mozilla / bigquery-etl

Bigquery ETL
https://mozilla.github.io/bigquery-etl
Mozilla Public License 2.0
253 stars 100 forks source link

GROWTH-143 Create new desktop conversion event table #5733

Closed kwindau closed 4 months ago

kwindau commented 4 months ago

Checklist for reviewer:

For modifications to schemas in restricted namespaces (see CODEOWNERS):

┆Issue is synchronized with this Jira Task

dataops-ci-bot commented 4 months ago

Integration report for "GROWTH-143 initial commit"

sql.diff

Click to expand! ```diff Only in /tmp/workspace/generated-sql/dags/: bqetl_desktop_conv_evnt_categorization.py diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/dags/bqetl_analytics_tables.py /tmp/workspace/generated-sql/dags/bqetl_analytics_tables.py --- /tmp/workspace/main-generated-sql/dags/bqetl_analytics_tables.py 2024-06-04 17:17:45.000000000 +0000 +++ /tmp/workspace/generated-sql/dags/bqetl_analytics_tables.py 2024-06-04 17:19:12.000000000 +0000 @@ -324,6 +324,13 @@ ) ExternalTaskMarker( + task_id="bqetl_desktop_conv_evnt_categorization__wait_for_checks__fail_telemetry_derived__clients_first_seen__v2", + external_dag_id="bqetl_desktop_conv_evnt_categorization", + external_task_id="wait_for_checks__fail_telemetry_derived__clients_first_seen__v2", + execution_date="{{ (execution_date - macros.timedelta(days=-1, seconds=50400)).isoformat() }}", + ) + + ExternalTaskMarker( task_id="bqetl_mozilla_org_derived__wait_for_checks__fail_telemetry_derived__clients_first_seen__v2", external_dag_id="bqetl_mozilla_org_derived", external_task_id="wait_for_checks__fail_telemetry_derived__clients_first_seen__v2", diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/dags/bqetl_desktop_conv_evnt_categorization.py /tmp/workspace/generated-sql/dags/bqetl_desktop_conv_evnt_categorization.py --- /tmp/workspace/main-generated-sql/dags/bqetl_desktop_conv_evnt_categorization.py 1970-01-01 00:00:00.000000000 +0000 +++ /tmp/workspace/generated-sql/dags/bqetl_desktop_conv_evnt_categorization.py 2024-06-04 17:19:13.000000000 +0000 @@ -0,0 +1,113 @@ +# Generated via https://github.com/mozilla/bigquery-etl/blob/main/bigquery_etl/query_scheduling/generate_airflow_dags.py + +from airflow import DAG +from airflow.sensors.external_task import ExternalTaskMarker +from airflow.sensors.external_task import ExternalTaskSensor +from airflow.utils.task_group import TaskGroup +import datetime +from operators.gcp_container_operator import GKEPodOperator +from utils.constants import ALLOWED_STATES, FAILED_STATES +from utils.gcp import bigquery_etl_query, bigquery_dq_check + +docs = """ +### bqetl_desktop_conv_evnt_categorization + +Built from bigquery-etl repo, [`dags/bqetl_desktop_conv_evnt_categorization.py`](https://github.com/mozilla/bigquery-etl/blob/generated-sql/dags/bqetl_desktop_conv_evnt_categorization.py) + +#### Description + +Loads the desktop conversion event tables +#### Owner + +kwindau@mozilla.com + +#### Tags + +* impact/tier_2 +* repo/bigquery-etl +""" + + +default_args = { + "owner": "kwindau@mozilla.com", + "start_date": datetime.datetime(2024, 6, 4, 0, 0), + "end_date": None, + "email": ["kwindau@mozilla.com", "telemetry-alerts@mozilla.com"], + "depends_on_past": False, + "retry_delay": datetime.timedelta(seconds=1800), + "email_on_failure": True, + "email_on_retry": False, + "retries": 2, +} + +tags = ["impact/tier_2", "repo/bigquery-etl"] + +with DAG( + "bqetl_desktop_conv_evnt_categorization", + default_args=default_args, + schedule_interval="0 12 * * *", + doc_md=docs, + tags=tags, +) as dag: + + wait_for_checks__fail_telemetry_derived__clients_first_seen__v2 = ( + ExternalTaskSensor( + task_id="wait_for_checks__fail_telemetry_derived__clients_first_seen__v2", + external_dag_id="bqetl_analytics_tables", + external_task_id="checks__fail_telemetry_derived__clients_first_seen__v2", + execution_delta=datetime.timedelta(seconds=36000), + check_existence=True, + mode="reschedule", + allowed_states=ALLOWED_STATES, + failed_states=FAILED_STATES, + pool="DATA_ENG_EXTERNALTASKSENSOR", + ) + ) + + wait_for_checks__fail_telemetry_derived__clients_last_seen__v2 = ExternalTaskSensor( + task_id="wait_for_checks__fail_telemetry_derived__clients_last_seen__v2", + external_dag_id="bqetl_main_summary", + external_task_id="checks__fail_telemetry_derived__clients_last_seen__v2", + execution_delta=datetime.timedelta(seconds=36000), + check_existence=True, + mode="reschedule", + allowed_states=ALLOWED_STATES, + failed_states=FAILED_STATES, + pool="DATA_ENG_EXTERNALTASKSENSOR", + ) + + checks__warn_google_ads_derived__conversion_event_categorization__v1 = bigquery_dq_check( + task_id="checks__warn_google_ads_derived__conversion_event_categorization__v1", + source_table="conversion_event_categorization_v1", + dataset_id="google_ads_derived", + project_id="moz-fx-data-shared-prod", + is_dq_check_fail=False, + owner="kwindau@mozilla.com", + email=["kwindau@mozilla.com", "telemetry-alerts@mozilla.com"], + depends_on_past=False, + parameters=["submission_date:DATE:{{ds}}"], + retries=0, + ) + + google_ads_derived__conversion_event_categorization__v1 = bigquery_etl_query( + task_id="google_ads_derived__conversion_event_categorization__v1", + destination_table="conversion_event_categorization_v1", + dataset_id="google_ads_derived", + project_id="moz-fx-data-shared-prod", + owner="kwindau@mozilla.com", + email=["kwindau@mozilla.com", "telemetry-alerts@mozilla.com"], + date_partition_parameter="submission_date", + depends_on_past=False, + ) + + checks__warn_google_ads_derived__conversion_event_categorization__v1.set_upstream( + google_ads_derived__conversion_event_categorization__v1 + ) + + google_ads_derived__conversion_event_categorization__v1.set_upstream( + wait_for_checks__fail_telemetry_derived__clients_first_seen__v2 + ) + + google_ads_derived__conversion_event_categorization__v1.set_upstream( + wait_for_checks__fail_telemetry_derived__clients_last_seen__v2 + ) diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/dags/bqetl_main_summary.py /tmp/workspace/generated-sql/dags/bqetl_main_summary.py --- /tmp/workspace/main-generated-sql/dags/bqetl_main_summary.py 2024-06-04 17:17:45.000000000 +0000 +++ /tmp/workspace/generated-sql/dags/bqetl_main_summary.py 2024-06-04 17:19:09.000000000 +0000 @@ -144,6 +144,13 @@ ) ExternalTaskMarker( + task_id="bqetl_desktop_conv_evnt_categorization__wait_for_checks__fail_telemetry_derived__clients_last_seen__v2", + external_dag_id="bqetl_desktop_conv_evnt_categorization", + external_task_id="wait_for_checks__fail_telemetry_derived__clients_last_seen__v2", + execution_date="{{ (execution_date - macros.timedelta(days=-1, seconds=50400)).isoformat() }}", + ) + + ExternalTaskMarker( task_id="bqetl_search_dashboard__wait_for_checks__fail_telemetry_derived__clients_last_seen__v2", external_dag_id="bqetl_search_dashboard", external_task_id="wait_for_checks__fail_telemetry_derived__clients_last_seen__v2", diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/dags/bqetl_mobile_kpi_metrics.py /tmp/workspace/generated-sql/dags/bqetl_mobile_kpi_metrics.py --- /tmp/workspace/main-generated-sql/dags/bqetl_mobile_kpi_metrics.py 2024-06-04 17:17:45.000000000 +0000 +++ /tmp/workspace/generated-sql/dags/bqetl_mobile_kpi_metrics.py 2024-06-04 17:19:13.000000000 +0000 @@ -30,7 +30,7 @@ default_args = { "owner": "kik@mozilla.com", - "start_date": datetime.datetime(2024, 6, 3, 0, 0), + "start_date": datetime.datetime(2024, 6, 8, 0, 0), "end_date": None, "email": ["kik@mozilla.com", "telemetry-alerts@mozilla.com"], "depends_on_past": False, Only in /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_ads: conversion_event_categorization Only in /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_ads_derived: conversion_event_categorization_v1 diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/google_ads/conversion_event_categorization/metadata.yaml /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_ads/conversion_event_categorization/metadata.yaml --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/google_ads/conversion_event_categorization/metadata.yaml 1970-01-01 00:00:00.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_ads/conversion_event_categorization/metadata.yaml 2024-06-04 17:15:55.000000000 +0000 @@ -0,0 +1,13 @@ +friendly_name: Conversion Event Categorization +description: |- + Please provide a description for the query +owners: [] +labels: {} +bigquery: null +workgroup_access: +- role: roles/bigquery.dataViewer + members: + - workgroup:mozilla-confidential +references: + view.sql: + - moz-fx-data-shared-prod.google_ads_derived.conversion_event_categorization_v1 diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/google_ads/conversion_event_categorization/view.sql /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_ads/conversion_event_categorization/view.sql --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/google_ads/conversion_event_categorization/view.sql 1970-01-01 00:00:00.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_ads/conversion_event_categorization/view.sql 2024-06-04 17:14:14.000000000 +0000 @@ -0,0 +1,7 @@ +CREATE OR REPLACE VIEW + `moz-fx-data-shared-prod.google_ads.conversion_event_categorization` +AS +SELECT + * +FROM + `moz-fx-data-shared-prod.google_ads_derived.conversion_event_categorization_v1` diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/google_ads_derived/conversion_event_categorization_v1/checks.sql /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_ads_derived/conversion_event_categorization_v1/checks.sql --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/google_ads_derived/conversion_event_categorization_v1/checks.sql 1970-01-01 00:00:00.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_ads_derived/conversion_event_categorization_v1/checks.sql 2024-06-04 17:14:14.000000000 +0000 @@ -0,0 +1,2 @@ +#warn +{{ is_unique(["client_id"]) }} diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/google_ads_derived/conversion_event_categorization_v1/metadata.yaml /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_ads_derived/conversion_event_categorization_v1/metadata.yaml --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/google_ads_derived/conversion_event_categorization_v1/metadata.yaml 1970-01-01 00:00:00.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_ads_derived/conversion_event_categorization_v1/metadata.yaml 2024-06-04 17:15:55.000000000 +0000 @@ -0,0 +1,28 @@ +friendly_name: Conversion Event Categorization +description: |- + Classifies conversion events +owners: +- kwindau@mozilla.com +labels: + incremental: true + owner1: kwindau + dag: bqetl_desktop_conv_evnt_categorization +scheduling: + dag_name: bqetl_desktop_conv_evnt_categorization + depends_on_past: false +bigquery: + time_partitioning: + type: day + field: first_seen_date + require_partition_filter: true + expiration_days: null + range_partitioning: null + clustering: + fields: + - client_id + - country +workgroup_access: +- role: roles/bigquery.dataViewer + members: + - workgroup:mozilla-confidential +references: {} diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/google_ads_derived/conversion_event_categorization_v1/query.sql /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_ads_derived/conversion_event_categorization_v1/query.sql --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/google_ads_derived/conversion_event_categorization_v1/query.sql 1970-01-01 00:00:00.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_ads_derived/conversion_event_categorization_v1/query.sql 2024-06-04 17:14:14.000000000 +0000 @@ -0,0 +1,131 @@ +--STEP 1: Get clients with a first seen date = submission date - 14 days +--Note: Min cohort date is 2023-11-01 so backfilling will return nothing before then +--Note: Max cohort date cannot be more than 7 days ago (to ensure we always have at least 7 days of data) +WITH clients_first_seen_14_days_ago AS ( + SELECT + client_id, + first_seen_date, + country, + attribution_campaign, + attribution_content, + attribution_dltoken, + attribution_medium, + attribution_source + FROM + `moz-fx-data-shared-prod.telemetry.clients_first_seen` --contains all new clients, including those that never sent a main ping + WHERE + first_seen_date = DATE_SUB(@submission_date, INTERVAL 14 DAY) + AND first_seen_date + BETWEEN '2023-11-01' + AND DATE_SUB(CURRENT_DATE, INTERVAL 8 DAY) +), +--Step 2: Get only the columns we need from clients last seen, for only the small window of time we need +clients_last_seen_raw AS ( + SELECT + cls.client_id, + cls.first_seen_date, + cls.country, + cls.submission_date, + cls.days_since_seen, + cls.active_hours_sum, + cls.days_visited_1_uri_bits, + cls.days_interacted_bits, + cls.search_with_ads_count_all + FROM + `moz-fx-data-shared-prod.telemetry.clients_last_seen` cls + JOIN + clients_first_seen_14_days_ago clients + ON cls.client_id = clients.client_id + WHERE + cls.submission_date >= '2023-11-01' --first cohort date + AND cls.submission_date + BETWEEN cls.first_seen_date + AND DATE_ADD(cls.first_seen_date, INTERVAL 6 DAY) --get first 7 days from their first main ping + --to process less data, we only check for pings between @submission date - 15 days and submission date + 15 days for each date this runs + AND cls.submission_date + BETWEEN DATE_SUB(@submission_date, INTERVAL 15 DAY) + AND DATE_ADD(@submission_date, INTERVAL 15 DAY) +), +--STEP 2: For every client, get the first 7 days worth of main pings sent after their first main ping +client_activity_first_7_days AS ( + SELECT + client_id, + ANY_VALUE( + first_seen_date + ) AS first_seen_date, --date we got first main ping (potentially different than above first seen date) + ANY_VALUE( + CASE + WHEN first_seen_date = submission_date + THEN country + END + ) AS country, --any country from their first 7 days of main pings + ANY_VALUE( + CASE + WHEN submission_date = DATE_ADD(first_seen_date, INTERVAL 6 DAY) + THEN BIT_COUNT(days_visited_1_uri_bits & days_interacted_bits) + END + ) AS dou, --total # of days of activity during their first 7 days of main pings + -- if a client doesn't send a ping on `submission_date` their last active day's value will be carried forward + -- so we only take measurements from days that they send a ping. + SUM( + CASE + WHEN days_since_seen = 0 + THEN COALESCE(active_hours_sum, 0) + ELSE 0 + END + ) AS active_hours_sum, + SUM( + CASE + WHEN days_since_seen = 0 + THEN COALESCE(search_with_ads_count_all, 0) + ELSE 0 + END + ) AS search_with_ads_count_all + FROM + clients_last_seen_raw + GROUP BY + client_id +), +combined AS ( + SELECT + client_id, + cfs.first_seen_date, + cfs.attribution_campaign, + cfs.attribution_content, + cfs.attribution_dltoken, + cfs.attribution_medium, + cfs.attribution_source, + @submission_date AS report_date, + IF(cls.first_seen_date IS NOT NULL, TRUE, FALSE) AS sent_main_ping_in_first_7_days, + COALESCE( + cls.country, + cfs.country + ) AS country, -- Conversion events & LTV are based on their first observed country in CLS, use that country if its available + COALESCE(dou, 0) AS dou, + COALESCE(active_hours_sum, 0) AS active_hours_sum, + COALESCE(search_with_ads_count_all, 0) AS search_with_ads_count_all + FROM + clients_first_seen_14_days_ago AS cfs + LEFT JOIN + client_activity_first_7_days AS cls + USING (client_id) +) +SELECT + client_id, + first_seen_date, + attribution_campaign, + attribution_content, + attribution_dltoken, + attribution_medium, + attribution_source, + report_date, + sent_main_ping_in_first_7_days, + country, + dou, + active_hours_sum, + search_with_ads_count_all, + IF(search_with_ads_count_all > 0 AND dou >= 5, TRUE, FALSE) AS event_1, + IF(search_with_ads_count_all > 0 AND dou >= 3, TRUE, FALSE) AS event_2, + IF(active_hours_sum >= 0.4 AND dou >= 3, TRUE, FALSE) AS event_3, +FROM + combined diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/google_ads_derived/conversion_event_categorization_v1/schema.yaml /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_ads_derived/conversion_event_categorization_v1/schema.yaml --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/google_ads_derived/conversion_event_categorization_v1/schema.yaml 1970-01-01 00:00:00.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_ads_derived/conversion_event_categorization_v1/schema.yaml 2024-06-04 17:14:14.000000000 +0000 @@ -0,0 +1,65 @@ +fields: +- mode: NULLABLE + name: client_id + type: STRING + description: Client ID +- mode: NULLABLE + name: first_seen_date + type: DATE + description: First Seen Date +- mode: NULLABLE + name: attribution_campaign + type: STRING + description: Attribution Campaign +- mode: NULLABLE + name: attribution_content + type: STRING + description: Attribution Content +- mode: NULLABLE + name: attribution_dltoken + type: STRING + description: Attribution Download Token +- mode: NULLABLE + name: attribution_medium + type: STRING + description: Attribution Medium +- mode: NULLABLE + name: attribution_source + type: STRING + description: Attribution Source +- mode: NULLABLE + name: report_date + type: DATE + description: Report Date +- mode: NULLABLE + name: sent_main_ping_in_first_7_days + type: BOOLEAN + description: Sent Main Ping In First 7 Days After First Seen Date Indicator +- mode: NULLABLE + name: country + type: STRING + description: Country +- mode: NULLABLE + name: dou + type: INT64 + description: DOU +- mode: NULLABLE + name: active_hours_sum + type: FLOAT + description: Active Hours Sum +- mode: NULLABLE + name: search_with_ads_count_all + type: INTEGER + description: Search With Ads Count All +- mode: NULLABLE + name: event_1 + type: BOOLEAN + description: Event 1 Indicator - 5 or more days of use and 1 or more search with ads (strictest event) +- mode: NULLABLE + name: event_2 + type: BOOLEAN + description: Event 2 Indicator - 3 or more days of use and 1 or more search with ads (medium event) +- mode: NULLABLE + name: event_3 + type: BOOLEAN + description: Event 3 Indicator - 3 or more days of use and 0.4 or more active hours (most lenient event) ```

Link to full diff

dataops-ci-bot commented 4 months ago

Integration report for "Merge branch 'main' into dsktp-conv-evnts"

sql.diff

Click to expand! ```diff Only in /tmp/workspace/generated-sql/dags/: bqetl_desktop_conv_evnt_categorization.py diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/dags/bqetl_analytics_tables.py /tmp/workspace/generated-sql/dags/bqetl_analytics_tables.py --- /tmp/workspace/main-generated-sql/dags/bqetl_analytics_tables.py 2024-06-04 17:21:39.000000000 +0000 +++ /tmp/workspace/generated-sql/dags/bqetl_analytics_tables.py 2024-06-04 17:22:37.000000000 +0000 @@ -324,6 +324,13 @@ ) ExternalTaskMarker( + task_id="bqetl_desktop_conv_evnt_categorization__wait_for_checks__fail_telemetry_derived__clients_first_seen__v2", + external_dag_id="bqetl_desktop_conv_evnt_categorization", + external_task_id="wait_for_checks__fail_telemetry_derived__clients_first_seen__v2", + execution_date="{{ (execution_date - macros.timedelta(days=-1, seconds=50400)).isoformat() }}", + ) + + ExternalTaskMarker( task_id="bqetl_mozilla_org_derived__wait_for_checks__fail_telemetry_derived__clients_first_seen__v2", external_dag_id="bqetl_mozilla_org_derived", external_task_id="wait_for_checks__fail_telemetry_derived__clients_first_seen__v2", diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/dags/bqetl_desktop_conv_evnt_categorization.py /tmp/workspace/generated-sql/dags/bqetl_desktop_conv_evnt_categorization.py --- /tmp/workspace/main-generated-sql/dags/bqetl_desktop_conv_evnt_categorization.py 1970-01-01 00:00:00.000000000 +0000 +++ /tmp/workspace/generated-sql/dags/bqetl_desktop_conv_evnt_categorization.py 2024-06-04 17:22:38.000000000 +0000 @@ -0,0 +1,113 @@ +# Generated via https://github.com/mozilla/bigquery-etl/blob/main/bigquery_etl/query_scheduling/generate_airflow_dags.py + +from airflow import DAG +from airflow.sensors.external_task import ExternalTaskMarker +from airflow.sensors.external_task import ExternalTaskSensor +from airflow.utils.task_group import TaskGroup +import datetime +from operators.gcp_container_operator import GKEPodOperator +from utils.constants import ALLOWED_STATES, FAILED_STATES +from utils.gcp import bigquery_etl_query, bigquery_dq_check + +docs = """ +### bqetl_desktop_conv_evnt_categorization + +Built from bigquery-etl repo, [`dags/bqetl_desktop_conv_evnt_categorization.py`](https://github.com/mozilla/bigquery-etl/blob/generated-sql/dags/bqetl_desktop_conv_evnt_categorization.py) + +#### Description + +Loads the desktop conversion event tables +#### Owner + +kwindau@mozilla.com + +#### Tags + +* impact/tier_2 +* repo/bigquery-etl +""" + + +default_args = { + "owner": "kwindau@mozilla.com", + "start_date": datetime.datetime(2024, 6, 4, 0, 0), + "end_date": None, + "email": ["kwindau@mozilla.com", "telemetry-alerts@mozilla.com"], + "depends_on_past": False, + "retry_delay": datetime.timedelta(seconds=1800), + "email_on_failure": True, + "email_on_retry": False, + "retries": 2, +} + +tags = ["impact/tier_2", "repo/bigquery-etl"] + +with DAG( + "bqetl_desktop_conv_evnt_categorization", + default_args=default_args, + schedule_interval="0 12 * * *", + doc_md=docs, + tags=tags, +) as dag: + + wait_for_checks__fail_telemetry_derived__clients_first_seen__v2 = ( + ExternalTaskSensor( + task_id="wait_for_checks__fail_telemetry_derived__clients_first_seen__v2", + external_dag_id="bqetl_analytics_tables", + external_task_id="checks__fail_telemetry_derived__clients_first_seen__v2", + execution_delta=datetime.timedelta(seconds=36000), + check_existence=True, + mode="reschedule", + allowed_states=ALLOWED_STATES, + failed_states=FAILED_STATES, + pool="DATA_ENG_EXTERNALTASKSENSOR", + ) + ) + + wait_for_checks__fail_telemetry_derived__clients_last_seen__v2 = ExternalTaskSensor( + task_id="wait_for_checks__fail_telemetry_derived__clients_last_seen__v2", + external_dag_id="bqetl_main_summary", + external_task_id="checks__fail_telemetry_derived__clients_last_seen__v2", + execution_delta=datetime.timedelta(seconds=36000), + check_existence=True, + mode="reschedule", + allowed_states=ALLOWED_STATES, + failed_states=FAILED_STATES, + pool="DATA_ENG_EXTERNALTASKSENSOR", + ) + + checks__warn_google_ads_derived__conversion_event_categorization__v1 = bigquery_dq_check( + task_id="checks__warn_google_ads_derived__conversion_event_categorization__v1", + source_table="conversion_event_categorization_v1", + dataset_id="google_ads_derived", + project_id="moz-fx-data-shared-prod", + is_dq_check_fail=False, + owner="kwindau@mozilla.com", + email=["kwindau@mozilla.com", "telemetry-alerts@mozilla.com"], + depends_on_past=False, + parameters=["submission_date:DATE:{{ds}}"], + retries=0, + ) + + google_ads_derived__conversion_event_categorization__v1 = bigquery_etl_query( + task_id="google_ads_derived__conversion_event_categorization__v1", + destination_table="conversion_event_categorization_v1", + dataset_id="google_ads_derived", + project_id="moz-fx-data-shared-prod", + owner="kwindau@mozilla.com", + email=["kwindau@mozilla.com", "telemetry-alerts@mozilla.com"], + date_partition_parameter="submission_date", + depends_on_past=False, + ) + + checks__warn_google_ads_derived__conversion_event_categorization__v1.set_upstream( + google_ads_derived__conversion_event_categorization__v1 + ) + + google_ads_derived__conversion_event_categorization__v1.set_upstream( + wait_for_checks__fail_telemetry_derived__clients_first_seen__v2 + ) + + google_ads_derived__conversion_event_categorization__v1.set_upstream( + wait_for_checks__fail_telemetry_derived__clients_last_seen__v2 + ) diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/dags/bqetl_main_summary.py /tmp/workspace/generated-sql/dags/bqetl_main_summary.py --- /tmp/workspace/main-generated-sql/dags/bqetl_main_summary.py 2024-06-04 17:21:39.000000000 +0000 +++ /tmp/workspace/generated-sql/dags/bqetl_main_summary.py 2024-06-04 17:22:34.000000000 +0000 @@ -144,6 +144,13 @@ ) ExternalTaskMarker( + task_id="bqetl_desktop_conv_evnt_categorization__wait_for_checks__fail_telemetry_derived__clients_last_seen__v2", + external_dag_id="bqetl_desktop_conv_evnt_categorization", + external_task_id="wait_for_checks__fail_telemetry_derived__clients_last_seen__v2", + execution_date="{{ (execution_date - macros.timedelta(days=-1, seconds=50400)).isoformat() }}", + ) + + ExternalTaskMarker( task_id="bqetl_search_dashboard__wait_for_checks__fail_telemetry_derived__clients_last_seen__v2", external_dag_id="bqetl_search_dashboard", external_task_id="wait_for_checks__fail_telemetry_derived__clients_last_seen__v2", Only in /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_ads: conversion_event_categorization Only in /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_ads_derived: conversion_event_categorization_v1 diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/firefox_ios_derived/retention_v1/backfill.yaml /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/firefox_ios_derived/retention_v1/backfill.yaml --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/firefox_ios_derived/retention_v1/backfill.yaml 2024-06-04 17:18:02.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/firefox_ios_derived/retention_v1/backfill.yaml 2024-06-04 17:18:06.000000000 +0000 @@ -4,4 +4,4 @@ reason: The table is created, this is to populate it with data. watchers: - kik@mozilla.com - status: Complete + status: Initiate diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/google_ads/conversion_event_categorization/metadata.yaml /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_ads/conversion_event_categorization/metadata.yaml --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/google_ads/conversion_event_categorization/metadata.yaml 1970-01-01 00:00:00.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_ads/conversion_event_categorization/metadata.yaml 2024-06-04 17:19:48.000000000 +0000 @@ -0,0 +1,13 @@ +friendly_name: Conversion Event Categorization +description: |- + Please provide a description for the query +owners: [] +labels: {} +bigquery: null +workgroup_access: +- role: roles/bigquery.dataViewer + members: + - workgroup:mozilla-confidential +references: + view.sql: + - moz-fx-data-shared-prod.google_ads_derived.conversion_event_categorization_v1 diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/google_ads/conversion_event_categorization/view.sql /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_ads/conversion_event_categorization/view.sql --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/google_ads/conversion_event_categorization/view.sql 1970-01-01 00:00:00.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_ads/conversion_event_categorization/view.sql 2024-06-04 17:18:06.000000000 +0000 @@ -0,0 +1,7 @@ +CREATE OR REPLACE VIEW + `moz-fx-data-shared-prod.google_ads.conversion_event_categorization` +AS +SELECT + * +FROM + `moz-fx-data-shared-prod.google_ads_derived.conversion_event_categorization_v1` diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/google_ads_derived/conversion_event_categorization_v1/checks.sql /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_ads_derived/conversion_event_categorization_v1/checks.sql --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/google_ads_derived/conversion_event_categorization_v1/checks.sql 1970-01-01 00:00:00.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_ads_derived/conversion_event_categorization_v1/checks.sql 2024-06-04 17:18:06.000000000 +0000 @@ -0,0 +1,2 @@ +#warn +{{ is_unique(["client_id"]) }} diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/google_ads_derived/conversion_event_categorization_v1/metadata.yaml /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_ads_derived/conversion_event_categorization_v1/metadata.yaml --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/google_ads_derived/conversion_event_categorization_v1/metadata.yaml 1970-01-01 00:00:00.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_ads_derived/conversion_event_categorization_v1/metadata.yaml 2024-06-04 17:19:51.000000000 +0000 @@ -0,0 +1,28 @@ +friendly_name: Conversion Event Categorization +description: |- + Classifies conversion events +owners: +- kwindau@mozilla.com +labels: + incremental: true + owner1: kwindau + dag: bqetl_desktop_conv_evnt_categorization +scheduling: + dag_name: bqetl_desktop_conv_evnt_categorization + depends_on_past: false +bigquery: + time_partitioning: + type: day + field: first_seen_date + require_partition_filter: true + expiration_days: null + range_partitioning: null + clustering: + fields: + - client_id + - country +workgroup_access: +- role: roles/bigquery.dataViewer + members: + - workgroup:mozilla-confidential +references: {} diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/google_ads_derived/conversion_event_categorization_v1/query.sql /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_ads_derived/conversion_event_categorization_v1/query.sql --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/google_ads_derived/conversion_event_categorization_v1/query.sql 1970-01-01 00:00:00.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_ads_derived/conversion_event_categorization_v1/query.sql 2024-06-04 17:18:06.000000000 +0000 @@ -0,0 +1,131 @@ +--STEP 1: Get clients with a first seen date = submission date - 14 days +--Note: Min cohort date is 2023-11-01 so backfilling will return nothing before then +--Note: Max cohort date cannot be more than 7 days ago (to ensure we always have at least 7 days of data) +WITH clients_first_seen_14_days_ago AS ( + SELECT + client_id, + first_seen_date, + country, + attribution_campaign, + attribution_content, + attribution_dltoken, + attribution_medium, + attribution_source + FROM + `moz-fx-data-shared-prod.telemetry.clients_first_seen` --contains all new clients, including those that never sent a main ping + WHERE + first_seen_date = DATE_SUB(@submission_date, INTERVAL 14 DAY) + AND first_seen_date + BETWEEN '2023-11-01' + AND DATE_SUB(CURRENT_DATE, INTERVAL 8 DAY) +), +--Step 2: Get only the columns we need from clients last seen, for only the small window of time we need +clients_last_seen_raw AS ( + SELECT + cls.client_id, + cls.first_seen_date, + cls.country, + cls.submission_date, + cls.days_since_seen, + cls.active_hours_sum, + cls.days_visited_1_uri_bits, + cls.days_interacted_bits, + cls.search_with_ads_count_all + FROM + `moz-fx-data-shared-prod.telemetry.clients_last_seen` cls + JOIN + clients_first_seen_14_days_ago clients + ON cls.client_id = clients.client_id + WHERE + cls.submission_date >= '2023-11-01' --first cohort date + AND cls.submission_date + BETWEEN cls.first_seen_date + AND DATE_ADD(cls.first_seen_date, INTERVAL 6 DAY) --get first 7 days from their first main ping + --to process less data, we only check for pings between @submission date - 15 days and submission date + 15 days for each date this runs + AND cls.submission_date + BETWEEN DATE_SUB(@submission_date, INTERVAL 15 DAY) + AND DATE_ADD(@submission_date, INTERVAL 15 DAY) +), +--STEP 2: For every client, get the first 7 days worth of main pings sent after their first main ping +client_activity_first_7_days AS ( + SELECT + client_id, + ANY_VALUE( + first_seen_date + ) AS first_seen_date, --date we got first main ping (potentially different than above first seen date) + ANY_VALUE( + CASE + WHEN first_seen_date = submission_date + THEN country + END + ) AS country, --any country from their first 7 days of main pings + ANY_VALUE( + CASE + WHEN submission_date = DATE_ADD(first_seen_date, INTERVAL 6 DAY) + THEN BIT_COUNT(days_visited_1_uri_bits & days_interacted_bits) + END + ) AS dou, --total # of days of activity during their first 7 days of main pings + -- if a client doesn't send a ping on `submission_date` their last active day's value will be carried forward + -- so we only take measurements from days that they send a ping. + SUM( + CASE + WHEN days_since_seen = 0 + THEN COALESCE(active_hours_sum, 0) + ELSE 0 + END + ) AS active_hours_sum, + SUM( + CASE + WHEN days_since_seen = 0 + THEN COALESCE(search_with_ads_count_all, 0) + ELSE 0 + END + ) AS search_with_ads_count_all + FROM + clients_last_seen_raw + GROUP BY + client_id +), +combined AS ( + SELECT + client_id, + cfs.first_seen_date, + cfs.attribution_campaign, + cfs.attribution_content, + cfs.attribution_dltoken, + cfs.attribution_medium, + cfs.attribution_source, + @submission_date AS report_date, + IF(cls.first_seen_date IS NOT NULL, TRUE, FALSE) AS sent_main_ping_in_first_7_days, + COALESCE( + cls.country, + cfs.country + ) AS country, -- Conversion events & LTV are based on their first observed country in CLS, use that country if its available + COALESCE(dou, 0) AS dou, + COALESCE(active_hours_sum, 0) AS active_hours_sum, + COALESCE(search_with_ads_count_all, 0) AS search_with_ads_count_all + FROM + clients_first_seen_14_days_ago AS cfs + LEFT JOIN + client_activity_first_7_days AS cls + USING (client_id) +) +SELECT + client_id, + first_seen_date, + attribution_campaign, + attribution_content, + attribution_dltoken, + attribution_medium, + attribution_source, + report_date, + sent_main_ping_in_first_7_days, + country, + dou, + active_hours_sum, + search_with_ads_count_all, + IF(search_with_ads_count_all > 0 AND dou >= 5, TRUE, FALSE) AS event_1, + IF(search_with_ads_count_all > 0 AND dou >= 3, TRUE, FALSE) AS event_2, + IF(active_hours_sum >= 0.4 AND dou >= 3, TRUE, FALSE) AS event_3, +FROM + combined diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/google_ads_derived/conversion_event_categorization_v1/schema.yaml /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_ads_derived/conversion_event_categorization_v1/schema.yaml --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/google_ads_derived/conversion_event_categorization_v1/schema.yaml 1970-01-01 00:00:00.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_ads_derived/conversion_event_categorization_v1/schema.yaml 2024-06-04 17:18:06.000000000 +0000 @@ -0,0 +1,65 @@ +fields: +- mode: NULLABLE + name: client_id + type: STRING + description: Client ID +- mode: NULLABLE + name: first_seen_date + type: DATE + description: First Seen Date +- mode: NULLABLE + name: attribution_campaign + type: STRING + description: Attribution Campaign +- mode: NULLABLE + name: attribution_content + type: STRING + description: Attribution Content +- mode: NULLABLE + name: attribution_dltoken + type: STRING + description: Attribution Download Token +- mode: NULLABLE + name: attribution_medium + type: STRING + description: Attribution Medium +- mode: NULLABLE + name: attribution_source + type: STRING + description: Attribution Source +- mode: NULLABLE + name: report_date + type: DATE + description: Report Date +- mode: NULLABLE + name: sent_main_ping_in_first_7_days + type: BOOLEAN + description: Sent Main Ping In First 7 Days After First Seen Date Indicator +- mode: NULLABLE + name: country + type: STRING + description: Country +- mode: NULLABLE + name: dou + type: INT64 + description: DOU +- mode: NULLABLE + name: active_hours_sum + type: FLOAT + description: Active Hours Sum +- mode: NULLABLE + name: search_with_ads_count_all + type: INTEGER + description: Search With Ads Count All +- mode: NULLABLE + name: event_1 + type: BOOLEAN + description: Event 1 Indicator - 5 or more days of use and 1 or more search with ads (strictest event) +- mode: NULLABLE + name: event_2 + type: BOOLEAN + description: Event 2 Indicator - 3 or more days of use and 1 or more search with ads (medium event) +- mode: NULLABLE + name: event_3 + type: BOOLEAN + description: Event 3 Indicator - 3 or more days of use and 0.4 or more active hours (most lenient event) ```

Link to full diff

dataops-ci-bot commented 4 months ago

Integration report for "Merge branch 'main' into dsktp-conv-evnts"

sql.diff

Click to expand! ```diff Only in /tmp/workspace/generated-sql/dags/: bqetl_desktop_conv_evnt_categorization.py diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/dags/bqetl_analytics_tables.py /tmp/workspace/generated-sql/dags/bqetl_analytics_tables.py --- /tmp/workspace/main-generated-sql/dags/bqetl_analytics_tables.py 2024-06-04 17:22:12.000000000 +0000 +++ /tmp/workspace/generated-sql/dags/bqetl_analytics_tables.py 2024-06-04 17:23:35.000000000 +0000 @@ -324,6 +324,13 @@ ) ExternalTaskMarker( + task_id="bqetl_desktop_conv_evnt_categorization__wait_for_checks__fail_telemetry_derived__clients_first_seen__v2", + external_dag_id="bqetl_desktop_conv_evnt_categorization", + external_task_id="wait_for_checks__fail_telemetry_derived__clients_first_seen__v2", + execution_date="{{ (execution_date - macros.timedelta(days=-1, seconds=50400)).isoformat() }}", + ) + + ExternalTaskMarker( task_id="bqetl_mozilla_org_derived__wait_for_checks__fail_telemetry_derived__clients_first_seen__v2", external_dag_id="bqetl_mozilla_org_derived", external_task_id="wait_for_checks__fail_telemetry_derived__clients_first_seen__v2", diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/dags/bqetl_desktop_conv_evnt_categorization.py /tmp/workspace/generated-sql/dags/bqetl_desktop_conv_evnt_categorization.py --- /tmp/workspace/main-generated-sql/dags/bqetl_desktop_conv_evnt_categorization.py 1970-01-01 00:00:00.000000000 +0000 +++ /tmp/workspace/generated-sql/dags/bqetl_desktop_conv_evnt_categorization.py 2024-06-04 17:23:36.000000000 +0000 @@ -0,0 +1,113 @@ +# Generated via https://github.com/mozilla/bigquery-etl/blob/main/bigquery_etl/query_scheduling/generate_airflow_dags.py + +from airflow import DAG +from airflow.sensors.external_task import ExternalTaskMarker +from airflow.sensors.external_task import ExternalTaskSensor +from airflow.utils.task_group import TaskGroup +import datetime +from operators.gcp_container_operator import GKEPodOperator +from utils.constants import ALLOWED_STATES, FAILED_STATES +from utils.gcp import bigquery_etl_query, bigquery_dq_check + +docs = """ +### bqetl_desktop_conv_evnt_categorization + +Built from bigquery-etl repo, [`dags/bqetl_desktop_conv_evnt_categorization.py`](https://github.com/mozilla/bigquery-etl/blob/generated-sql/dags/bqetl_desktop_conv_evnt_categorization.py) + +#### Description + +Loads the desktop conversion event tables +#### Owner + +kwindau@mozilla.com + +#### Tags + +* impact/tier_2 +* repo/bigquery-etl +""" + + +default_args = { + "owner": "kwindau@mozilla.com", + "start_date": datetime.datetime(2024, 6, 4, 0, 0), + "end_date": None, + "email": ["kwindau@mozilla.com", "telemetry-alerts@mozilla.com"], + "depends_on_past": False, + "retry_delay": datetime.timedelta(seconds=1800), + "email_on_failure": True, + "email_on_retry": False, + "retries": 2, +} + +tags = ["impact/tier_2", "repo/bigquery-etl"] + +with DAG( + "bqetl_desktop_conv_evnt_categorization", + default_args=default_args, + schedule_interval="0 12 * * *", + doc_md=docs, + tags=tags, +) as dag: + + wait_for_checks__fail_telemetry_derived__clients_first_seen__v2 = ( + ExternalTaskSensor( + task_id="wait_for_checks__fail_telemetry_derived__clients_first_seen__v2", + external_dag_id="bqetl_analytics_tables", + external_task_id="checks__fail_telemetry_derived__clients_first_seen__v2", + execution_delta=datetime.timedelta(seconds=36000), + check_existence=True, + mode="reschedule", + allowed_states=ALLOWED_STATES, + failed_states=FAILED_STATES, + pool="DATA_ENG_EXTERNALTASKSENSOR", + ) + ) + + wait_for_checks__fail_telemetry_derived__clients_last_seen__v2 = ExternalTaskSensor( + task_id="wait_for_checks__fail_telemetry_derived__clients_last_seen__v2", + external_dag_id="bqetl_main_summary", + external_task_id="checks__fail_telemetry_derived__clients_last_seen__v2", + execution_delta=datetime.timedelta(seconds=36000), + check_existence=True, + mode="reschedule", + allowed_states=ALLOWED_STATES, + failed_states=FAILED_STATES, + pool="DATA_ENG_EXTERNALTASKSENSOR", + ) + + checks__warn_google_ads_derived__conversion_event_categorization__v1 = bigquery_dq_check( + task_id="checks__warn_google_ads_derived__conversion_event_categorization__v1", + source_table="conversion_event_categorization_v1", + dataset_id="google_ads_derived", + project_id="moz-fx-data-shared-prod", + is_dq_check_fail=False, + owner="kwindau@mozilla.com", + email=["kwindau@mozilla.com", "telemetry-alerts@mozilla.com"], + depends_on_past=False, + parameters=["submission_date:DATE:{{ds}}"], + retries=0, + ) + + google_ads_derived__conversion_event_categorization__v1 = bigquery_etl_query( + task_id="google_ads_derived__conversion_event_categorization__v1", + destination_table="conversion_event_categorization_v1", + dataset_id="google_ads_derived", + project_id="moz-fx-data-shared-prod", + owner="kwindau@mozilla.com", + email=["kwindau@mozilla.com", "telemetry-alerts@mozilla.com"], + date_partition_parameter="submission_date", + depends_on_past=False, + ) + + checks__warn_google_ads_derived__conversion_event_categorization__v1.set_upstream( + google_ads_derived__conversion_event_categorization__v1 + ) + + google_ads_derived__conversion_event_categorization__v1.set_upstream( + wait_for_checks__fail_telemetry_derived__clients_first_seen__v2 + ) + + google_ads_derived__conversion_event_categorization__v1.set_upstream( + wait_for_checks__fail_telemetry_derived__clients_last_seen__v2 + ) diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/dags/bqetl_main_summary.py /tmp/workspace/generated-sql/dags/bqetl_main_summary.py --- /tmp/workspace/main-generated-sql/dags/bqetl_main_summary.py 2024-06-04 17:22:12.000000000 +0000 +++ /tmp/workspace/generated-sql/dags/bqetl_main_summary.py 2024-06-04 17:23:33.000000000 +0000 @@ -144,6 +144,13 @@ ) ExternalTaskMarker( + task_id="bqetl_desktop_conv_evnt_categorization__wait_for_checks__fail_telemetry_derived__clients_last_seen__v2", + external_dag_id="bqetl_desktop_conv_evnt_categorization", + external_task_id="wait_for_checks__fail_telemetry_derived__clients_last_seen__v2", + execution_date="{{ (execution_date - macros.timedelta(days=-1, seconds=50400)).isoformat() }}", + ) + + ExternalTaskMarker( task_id="bqetl_search_dashboard__wait_for_checks__fail_telemetry_derived__clients_last_seen__v2", external_dag_id="bqetl_search_dashboard", external_task_id="wait_for_checks__fail_telemetry_derived__clients_last_seen__v2", Only in /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_ads: conversion_event_categorization Only in /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_ads_derived: conversion_event_categorization_v1 diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/google_ads/conversion_event_categorization/metadata.yaml /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_ads/conversion_event_categorization/metadata.yaml --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/google_ads/conversion_event_categorization/metadata.yaml 1970-01-01 00:00:00.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_ads/conversion_event_categorization/metadata.yaml 2024-06-04 17:20:36.000000000 +0000 @@ -0,0 +1,13 @@ +friendly_name: Conversion Event Categorization +description: |- + Please provide a description for the query +owners: [] +labels: {} +bigquery: null +workgroup_access: +- role: roles/bigquery.dataViewer + members: + - workgroup:mozilla-confidential +references: + view.sql: + - moz-fx-data-shared-prod.google_ads_derived.conversion_event_categorization_v1 diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/google_ads/conversion_event_categorization/view.sql /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_ads/conversion_event_categorization/view.sql --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/google_ads/conversion_event_categorization/view.sql 1970-01-01 00:00:00.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_ads/conversion_event_categorization/view.sql 2024-06-04 17:18:50.000000000 +0000 @@ -0,0 +1,7 @@ +CREATE OR REPLACE VIEW + `moz-fx-data-shared-prod.google_ads.conversion_event_categorization` +AS +SELECT + * +FROM + `moz-fx-data-shared-prod.google_ads_derived.conversion_event_categorization_v1` diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/google_ads_derived/conversion_event_categorization_v1/checks.sql /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_ads_derived/conversion_event_categorization_v1/checks.sql --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/google_ads_derived/conversion_event_categorization_v1/checks.sql 1970-01-01 00:00:00.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_ads_derived/conversion_event_categorization_v1/checks.sql 2024-06-04 17:18:50.000000000 +0000 @@ -0,0 +1,2 @@ +#warn +{{ is_unique(["client_id"]) }} diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/google_ads_derived/conversion_event_categorization_v1/metadata.yaml /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_ads_derived/conversion_event_categorization_v1/metadata.yaml --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/google_ads_derived/conversion_event_categorization_v1/metadata.yaml 1970-01-01 00:00:00.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_ads_derived/conversion_event_categorization_v1/metadata.yaml 2024-06-04 17:20:38.000000000 +0000 @@ -0,0 +1,28 @@ +friendly_name: Conversion Event Categorization +description: |- + Classifies conversion events +owners: +- kwindau@mozilla.com +labels: + incremental: true + owner1: kwindau + dag: bqetl_desktop_conv_evnt_categorization +scheduling: + dag_name: bqetl_desktop_conv_evnt_categorization + depends_on_past: false +bigquery: + time_partitioning: + type: day + field: first_seen_date + require_partition_filter: true + expiration_days: null + range_partitioning: null + clustering: + fields: + - client_id + - country +workgroup_access: +- role: roles/bigquery.dataViewer + members: + - workgroup:mozilla-confidential +references: {} diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/google_ads_derived/conversion_event_categorization_v1/query.sql /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_ads_derived/conversion_event_categorization_v1/query.sql --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/google_ads_derived/conversion_event_categorization_v1/query.sql 1970-01-01 00:00:00.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_ads_derived/conversion_event_categorization_v1/query.sql 2024-06-04 17:18:50.000000000 +0000 @@ -0,0 +1,131 @@ +--STEP 1: Get clients with a first seen date = submission date - 14 days +--Note: Min cohort date is 2023-11-01 so backfilling will return nothing before then +--Note: Max cohort date cannot be more than 7 days ago (to ensure we always have at least 7 days of data) +WITH clients_first_seen_14_days_ago AS ( + SELECT + client_id, + first_seen_date, + country, + attribution_campaign, + attribution_content, + attribution_dltoken, + attribution_medium, + attribution_source + FROM + `moz-fx-data-shared-prod.telemetry.clients_first_seen` --contains all new clients, including those that never sent a main ping + WHERE + first_seen_date = DATE_SUB(@submission_date, INTERVAL 14 DAY) + AND first_seen_date + BETWEEN '2023-11-01' + AND DATE_SUB(CURRENT_DATE, INTERVAL 8 DAY) +), +--Step 2: Get only the columns we need from clients last seen, for only the small window of time we need +clients_last_seen_raw AS ( + SELECT + cls.client_id, + cls.first_seen_date, + cls.country, + cls.submission_date, + cls.days_since_seen, + cls.active_hours_sum, + cls.days_visited_1_uri_bits, + cls.days_interacted_bits, + cls.search_with_ads_count_all + FROM + `moz-fx-data-shared-prod.telemetry.clients_last_seen` cls + JOIN + clients_first_seen_14_days_ago clients + ON cls.client_id = clients.client_id + WHERE + cls.submission_date >= '2023-11-01' --first cohort date + AND cls.submission_date + BETWEEN cls.first_seen_date + AND DATE_ADD(cls.first_seen_date, INTERVAL 6 DAY) --get first 7 days from their first main ping + --to process less data, we only check for pings between @submission date - 15 days and submission date + 15 days for each date this runs + AND cls.submission_date + BETWEEN DATE_SUB(@submission_date, INTERVAL 15 DAY) + AND DATE_ADD(@submission_date, INTERVAL 15 DAY) +), +--STEP 2: For every client, get the first 7 days worth of main pings sent after their first main ping +client_activity_first_7_days AS ( + SELECT + client_id, + ANY_VALUE( + first_seen_date + ) AS first_seen_date, --date we got first main ping (potentially different than above first seen date) + ANY_VALUE( + CASE + WHEN first_seen_date = submission_date + THEN country + END + ) AS country, --any country from their first 7 days of main pings + ANY_VALUE( + CASE + WHEN submission_date = DATE_ADD(first_seen_date, INTERVAL 6 DAY) + THEN BIT_COUNT(days_visited_1_uri_bits & days_interacted_bits) + END + ) AS dou, --total # of days of activity during their first 7 days of main pings + -- if a client doesn't send a ping on `submission_date` their last active day's value will be carried forward + -- so we only take measurements from days that they send a ping. + SUM( + CASE + WHEN days_since_seen = 0 + THEN COALESCE(active_hours_sum, 0) + ELSE 0 + END + ) AS active_hours_sum, + SUM( + CASE + WHEN days_since_seen = 0 + THEN COALESCE(search_with_ads_count_all, 0) + ELSE 0 + END + ) AS search_with_ads_count_all + FROM + clients_last_seen_raw + GROUP BY + client_id +), +combined AS ( + SELECT + client_id, + cfs.first_seen_date, + cfs.attribution_campaign, + cfs.attribution_content, + cfs.attribution_dltoken, + cfs.attribution_medium, + cfs.attribution_source, + @submission_date AS report_date, + IF(cls.first_seen_date IS NOT NULL, TRUE, FALSE) AS sent_main_ping_in_first_7_days, + COALESCE( + cls.country, + cfs.country + ) AS country, -- Conversion events & LTV are based on their first observed country in CLS, use that country if its available + COALESCE(dou, 0) AS dou, + COALESCE(active_hours_sum, 0) AS active_hours_sum, + COALESCE(search_with_ads_count_all, 0) AS search_with_ads_count_all + FROM + clients_first_seen_14_days_ago AS cfs + LEFT JOIN + client_activity_first_7_days AS cls + USING (client_id) +) +SELECT + client_id, + first_seen_date, + attribution_campaign, + attribution_content, + attribution_dltoken, + attribution_medium, + attribution_source, + report_date, + sent_main_ping_in_first_7_days, + country, + dou, + active_hours_sum, + search_with_ads_count_all, + IF(search_with_ads_count_all > 0 AND dou >= 5, TRUE, FALSE) AS event_1, + IF(search_with_ads_count_all > 0 AND dou >= 3, TRUE, FALSE) AS event_2, + IF(active_hours_sum >= 0.4 AND dou >= 3, TRUE, FALSE) AS event_3, +FROM + combined diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/google_ads_derived/conversion_event_categorization_v1/schema.yaml /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_ads_derived/conversion_event_categorization_v1/schema.yaml --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/google_ads_derived/conversion_event_categorization_v1/schema.yaml 1970-01-01 00:00:00.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_ads_derived/conversion_event_categorization_v1/schema.yaml 2024-06-04 17:18:50.000000000 +0000 @@ -0,0 +1,65 @@ +fields: +- mode: NULLABLE + name: client_id + type: STRING + description: Client ID +- mode: NULLABLE + name: first_seen_date + type: DATE + description: First Seen Date +- mode: NULLABLE + name: attribution_campaign + type: STRING + description: Attribution Campaign +- mode: NULLABLE + name: attribution_content + type: STRING + description: Attribution Content +- mode: NULLABLE + name: attribution_dltoken + type: STRING + description: Attribution Download Token +- mode: NULLABLE + name: attribution_medium + type: STRING + description: Attribution Medium +- mode: NULLABLE + name: attribution_source + type: STRING + description: Attribution Source +- mode: NULLABLE + name: report_date + type: DATE + description: Report Date +- mode: NULLABLE + name: sent_main_ping_in_first_7_days + type: BOOLEAN + description: Sent Main Ping In First 7 Days After First Seen Date Indicator +- mode: NULLABLE + name: country + type: STRING + description: Country +- mode: NULLABLE + name: dou + type: INT64 + description: DOU +- mode: NULLABLE + name: active_hours_sum + type: FLOAT + description: Active Hours Sum +- mode: NULLABLE + name: search_with_ads_count_all + type: INTEGER + description: Search With Ads Count All +- mode: NULLABLE + name: event_1 + type: BOOLEAN + description: Event 1 Indicator - 5 or more days of use and 1 or more search with ads (strictest event) +- mode: NULLABLE + name: event_2 + type: BOOLEAN + description: Event 2 Indicator - 3 or more days of use and 1 or more search with ads (medium event) +- mode: NULLABLE + name: event_3 + type: BOOLEAN + description: Event 3 Indicator - 3 or more days of use and 0.4 or more active hours (most lenient event) ```

Link to full diff

dataops-ci-bot commented 4 months ago

Integration report for "Set the offset in the parameter itself"

sql.diff

Click to expand! ```diff Only in /tmp/workspace/generated-sql/dags/: bqetl_desktop_conv_evnt_categorization.py diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/dags/bqetl_analytics_tables.py /tmp/workspace/generated-sql/dags/bqetl_analytics_tables.py --- /tmp/workspace/main-generated-sql/dags/bqetl_analytics_tables.py 2024-06-04 18:00:59.000000000 +0000 +++ /tmp/workspace/generated-sql/dags/bqetl_analytics_tables.py 2024-06-04 18:02:27.000000000 +0000 @@ -324,6 +324,13 @@ ) ExternalTaskMarker( + task_id="bqetl_desktop_conv_evnt_categorization__wait_for_checks__fail_telemetry_derived__clients_first_seen__v2", + external_dag_id="bqetl_desktop_conv_evnt_categorization", + external_task_id="wait_for_checks__fail_telemetry_derived__clients_first_seen__v2", + execution_date="{{ (execution_date - macros.timedelta(days=-1, seconds=50400)).isoformat() }}", + ) + + ExternalTaskMarker( task_id="bqetl_mozilla_org_derived__wait_for_checks__fail_telemetry_derived__clients_first_seen__v2", external_dag_id="bqetl_mozilla_org_derived", external_task_id="wait_for_checks__fail_telemetry_derived__clients_first_seen__v2", diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/dags/bqetl_desktop_conv_evnt_categorization.py /tmp/workspace/generated-sql/dags/bqetl_desktop_conv_evnt_categorization.py --- /tmp/workspace/main-generated-sql/dags/bqetl_desktop_conv_evnt_categorization.py 1970-01-01 00:00:00.000000000 +0000 +++ /tmp/workspace/generated-sql/dags/bqetl_desktop_conv_evnt_categorization.py 2024-06-04 18:02:28.000000000 +0000 @@ -0,0 +1,116 @@ +# Generated via https://github.com/mozilla/bigquery-etl/blob/main/bigquery_etl/query_scheduling/generate_airflow_dags.py + +from airflow import DAG +from airflow.sensors.external_task import ExternalTaskMarker +from airflow.sensors.external_task import ExternalTaskSensor +from airflow.utils.task_group import TaskGroup +import datetime +from operators.gcp_container_operator import GKEPodOperator +from utils.constants import ALLOWED_STATES, FAILED_STATES +from utils.gcp import bigquery_etl_query, bigquery_dq_check + +docs = """ +### bqetl_desktop_conv_evnt_categorization + +Built from bigquery-etl repo, [`dags/bqetl_desktop_conv_evnt_categorization.py`](https://github.com/mozilla/bigquery-etl/blob/generated-sql/dags/bqetl_desktop_conv_evnt_categorization.py) + +#### Description + +Loads the desktop conversion event tables +#### Owner + +kwindau@mozilla.com + +#### Tags + +* impact/tier_2 +* repo/bigquery-etl +""" + + +default_args = { + "owner": "kwindau@mozilla.com", + "start_date": datetime.datetime(2024, 6, 4, 0, 0), + "end_date": None, + "email": ["kwindau@mozilla.com", "telemetry-alerts@mozilla.com"], + "depends_on_past": False, + "retry_delay": datetime.timedelta(seconds=1800), + "email_on_failure": True, + "email_on_retry": False, + "retries": 2, +} + +tags = ["impact/tier_2", "repo/bigquery-etl"] + +with DAG( + "bqetl_desktop_conv_evnt_categorization", + default_args=default_args, + schedule_interval="0 12 * * *", + doc_md=docs, + tags=tags, +) as dag: + + wait_for_checks__fail_telemetry_derived__clients_first_seen__v2 = ( + ExternalTaskSensor( + task_id="wait_for_checks__fail_telemetry_derived__clients_first_seen__v2", + external_dag_id="bqetl_analytics_tables", + external_task_id="checks__fail_telemetry_derived__clients_first_seen__v2", + execution_delta=datetime.timedelta(seconds=36000), + check_existence=True, + mode="reschedule", + allowed_states=ALLOWED_STATES, + failed_states=FAILED_STATES, + pool="DATA_ENG_EXTERNALTASKSENSOR", + ) + ) + + wait_for_checks__fail_telemetry_derived__clients_last_seen__v2 = ExternalTaskSensor( + task_id="wait_for_checks__fail_telemetry_derived__clients_last_seen__v2", + external_dag_id="bqetl_main_summary", + external_task_id="checks__fail_telemetry_derived__clients_last_seen__v2", + execution_delta=datetime.timedelta(seconds=36000), + check_existence=True, + mode="reschedule", + allowed_states=ALLOWED_STATES, + failed_states=FAILED_STATES, + pool="DATA_ENG_EXTERNALTASKSENSOR", + ) + + checks__warn_google_ads_derived__conversion_event_categorization__v1 = bigquery_dq_check( + task_id="checks__warn_google_ads_derived__conversion_event_categorization__v1", + source_table='conversion_event_categorization_v1${{ macros.ds_format(macros.ds_add(ds, -14), "%Y-%m-%d", "%Y%m%d") }}', + dataset_id="google_ads_derived", + project_id="moz-fx-data-shared-prod", + is_dq_check_fail=False, + owner="kwindau@mozilla.com", + email=["kwindau@mozilla.com", "telemetry-alerts@mozilla.com"], + depends_on_past=False, + parameters=["submission_date:DATE:{{macros.ds_add(ds, -14)}}"] + + ["submission_date:DATE:{{ds}}"], + retries=0, + ) + + google_ads_derived__conversion_event_categorization__v1 = bigquery_etl_query( + task_id="google_ads_derived__conversion_event_categorization__v1", + destination_table='conversion_event_categorization_v1${{ macros.ds_format(macros.ds_add(ds, -14), "%Y-%m-%d", "%Y%m%d") }}', + dataset_id="google_ads_derived", + project_id="moz-fx-data-shared-prod", + owner="kwindau@mozilla.com", + email=["kwindau@mozilla.com", "telemetry-alerts@mozilla.com"], + date_partition_parameter=None, + depends_on_past=False, + parameters=["submission_date:DATE:{{macros.ds_add(ds, -14)}}"] + + ["submission_date:DATE:{{ds}}"], + ) + + checks__warn_google_ads_derived__conversion_event_categorization__v1.set_upstream( + google_ads_derived__conversion_event_categorization__v1 + ) + + google_ads_derived__conversion_event_categorization__v1.set_upstream( + wait_for_checks__fail_telemetry_derived__clients_first_seen__v2 + ) + + google_ads_derived__conversion_event_categorization__v1.set_upstream( + wait_for_checks__fail_telemetry_derived__clients_last_seen__v2 + ) diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/dags/bqetl_main_summary.py /tmp/workspace/generated-sql/dags/bqetl_main_summary.py --- /tmp/workspace/main-generated-sql/dags/bqetl_main_summary.py 2024-06-04 18:00:59.000000000 +0000 +++ /tmp/workspace/generated-sql/dags/bqetl_main_summary.py 2024-06-04 18:02:25.000000000 +0000 @@ -144,6 +144,13 @@ ) ExternalTaskMarker( + task_id="bqetl_desktop_conv_evnt_categorization__wait_for_checks__fail_telemetry_derived__clients_last_seen__v2", + external_dag_id="bqetl_desktop_conv_evnt_categorization", + external_task_id="wait_for_checks__fail_telemetry_derived__clients_last_seen__v2", + execution_date="{{ (execution_date - macros.timedelta(days=-1, seconds=50400)).isoformat() }}", + ) + + ExternalTaskMarker( task_id="bqetl_search_dashboard__wait_for_checks__fail_telemetry_derived__clients_last_seen__v2", external_dag_id="bqetl_search_dashboard", external_task_id="wait_for_checks__fail_telemetry_derived__clients_last_seen__v2", Only in /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_ads: conversion_event_categorization Only in /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_ads_derived: conversion_event_categorization_v1 diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/google_ads/conversion_event_categorization/metadata.yaml /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_ads/conversion_event_categorization/metadata.yaml --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/google_ads/conversion_event_categorization/metadata.yaml 1970-01-01 00:00:00.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_ads/conversion_event_categorization/metadata.yaml 2024-06-04 17:59:37.000000000 +0000 @@ -0,0 +1,13 @@ +friendly_name: Conversion Event Categorization +description: |- + Please provide a description for the query +owners: [] +labels: {} +bigquery: null +workgroup_access: +- role: roles/bigquery.dataViewer + members: + - workgroup:mozilla-confidential +references: + view.sql: + - moz-fx-data-shared-prod.google_ads_derived.conversion_event_categorization_v1 diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/google_ads/conversion_event_categorization/view.sql /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_ads/conversion_event_categorization/view.sql --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/google_ads/conversion_event_categorization/view.sql 1970-01-01 00:00:00.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_ads/conversion_event_categorization/view.sql 2024-06-04 17:57:43.000000000 +0000 @@ -0,0 +1,7 @@ +CREATE OR REPLACE VIEW + `moz-fx-data-shared-prod.google_ads.conversion_event_categorization` +AS +SELECT + * +FROM + `moz-fx-data-shared-prod.google_ads_derived.conversion_event_categorization_v1` diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/google_ads_derived/conversion_event_categorization_v1/checks.sql /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_ads_derived/conversion_event_categorization_v1/checks.sql --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/google_ads_derived/conversion_event_categorization_v1/checks.sql 1970-01-01 00:00:00.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_ads_derived/conversion_event_categorization_v1/checks.sql 2024-06-04 17:57:43.000000000 +0000 @@ -0,0 +1,2 @@ +#warn +{{ is_unique(["client_id"]) }} diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/google_ads_derived/conversion_event_categorization_v1/metadata.yaml /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_ads_derived/conversion_event_categorization_v1/metadata.yaml --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/google_ads_derived/conversion_event_categorization_v1/metadata.yaml 1970-01-01 00:00:00.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_ads_derived/conversion_event_categorization_v1/metadata.yaml 2024-06-04 17:59:37.000000000 +0000 @@ -0,0 +1,32 @@ +friendly_name: Conversion Event Categorization +description: |- + Classifies conversion events +owners: +- kwindau@mozilla.com +labels: + incremental: true + owner1: kwindau + dag: bqetl_desktop_conv_evnt_categorization +scheduling: + dag_name: bqetl_desktop_conv_evnt_categorization + depends_on_past: false + date_partition_parameter: submission_date + date_partition_offset: -14 + parameters: + - submission_date:DATE:{{ds}} +bigquery: + time_partitioning: + type: day + field: first_seen_date + require_partition_filter: true + expiration_days: null + range_partitioning: null + clustering: + fields: + - client_id + - country +workgroup_access: +- role: roles/bigquery.dataViewer + members: + - workgroup:mozilla-confidential +references: {} diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/google_ads_derived/conversion_event_categorization_v1/query.sql /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_ads_derived/conversion_event_categorization_v1/query.sql --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/google_ads_derived/conversion_event_categorization_v1/query.sql 1970-01-01 00:00:00.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_ads_derived/conversion_event_categorization_v1/query.sql 2024-06-04 17:57:43.000000000 +0000 @@ -0,0 +1,131 @@ +--STEP 1: Get clients with a first seen date = submission date - 14 days +--Note: Min cohort date is 2023-11-01 so backfilling will return nothing before then +--Note: Max cohort date cannot be more than 7 days ago (to ensure we always have at least 7 days of data) +WITH clients_first_seen_14_days_ago AS ( + SELECT + client_id, + first_seen_date, + country, + attribution_campaign, + attribution_content, + attribution_dltoken, + attribution_medium, + attribution_source + FROM + `moz-fx-data-shared-prod.telemetry.clients_first_seen` --contains all new clients, including those that never sent a main ping + WHERE + first_seen_date = @submission_date --this is 14 days before {{ds}} + AND first_seen_date + BETWEEN '2023-11-01' + AND DATE_SUB(CURRENT_DATE, INTERVAL 8 DAY) +), +--Step 2: Get only the columns we need from clients last seen, for only the small window of time we need +clients_last_seen_raw AS ( + SELECT + cls.client_id, + cls.first_seen_date, + cls.country, + cls.submission_date, + cls.days_since_seen, + cls.active_hours_sum, + cls.days_visited_1_uri_bits, + cls.days_interacted_bits, + cls.search_with_ads_count_all + FROM + `moz-fx-data-shared-prod.telemetry.clients_last_seen` cls + JOIN + clients_first_seen_14_days_ago clients + ON cls.client_id = clients.client_id + WHERE + cls.submission_date >= '2023-11-01' --first cohort date + AND cls.submission_date + BETWEEN cls.first_seen_date + AND DATE_ADD(cls.first_seen_date, INTERVAL 6 DAY) --get first 7 days from their first main ping + --to process less data, we only check for pings between @submission date - 15 days and submission date + 15 days for each date this runs + AND cls.submission_date + BETWEEN DATE_SUB(@submission_date, INTERVAL 1 DAY) --15 days before DS + AND DATE_ADD(@submission_date, INTERVAL 29 DAY) --29 days after DS +), +--STEP 2: For every client, get the first 7 days worth of main pings sent after their first main ping +client_activity_first_7_days AS ( + SELECT + client_id, + ANY_VALUE( + first_seen_date + ) AS first_seen_date, --date we got first main ping (potentially different than above first seen date) + ANY_VALUE( + CASE + WHEN first_seen_date = submission_date + THEN country + END + ) AS country, --any country from their first 7 days of main pings + ANY_VALUE( + CASE + WHEN submission_date = DATE_ADD(first_seen_date, INTERVAL 6 DAY) + THEN BIT_COUNT(days_visited_1_uri_bits & days_interacted_bits) + END + ) AS dou, --total # of days of activity during their first 7 days of main pings + -- if a client doesn't send a ping on `submission_date` their last active day's value will be carried forward + -- so we only take measurements from days that they send a ping. + SUM( + CASE + WHEN days_since_seen = 0 + THEN COALESCE(active_hours_sum, 0) + ELSE 0 + END + ) AS active_hours_sum, + SUM( + CASE + WHEN days_since_seen = 0 + THEN COALESCE(search_with_ads_count_all, 0) + ELSE 0 + END + ) AS search_with_ads_count_all + FROM + clients_last_seen_raw + GROUP BY + client_id +), +combined AS ( + SELECT + cfs.client_id, + cfs.first_seen_date, + cfs.attribution_campaign, + cfs.attribution_content, + cfs.attribution_dltoken, + cfs.attribution_medium, + cfs.attribution_source, + @submission_date AS report_date, + IF(cls.first_seen_date IS NOT NULL, TRUE, FALSE) AS sent_main_ping_in_first_7_days, + COALESCE( + cls.country, + cfs.country + ) AS country, -- Conversion events & LTV are based on their first observed country in CLS, use that country if its available + COALESCE(dou, 0) AS dou, + COALESCE(active_hours_sum, 0) AS active_hours_sum, + COALESCE(search_with_ads_count_all, 0) AS search_with_ads_count_all + FROM + clients_first_seen_14_days_ago AS cfs + LEFT JOIN + client_activity_first_7_days AS cls + USING (client_id) +) +SELECT + client_id, + first_seen_date, + attribution_campaign, + attribution_content, + attribution_dltoken, + attribution_medium, + attribution_source, + report_date, + sent_main_ping_in_first_7_days, + country, + dou, + active_hours_sum, + search_with_ads_count_all, + IF(search_with_ads_count_all > 0 AND dou >= 5, TRUE, FALSE) AS event_1, + IF(search_with_ads_count_all > 0 AND dou >= 3, TRUE, FALSE) AS event_2, + IF(active_hours_sum >= 0.4 AND dou >= 3, TRUE, FALSE) AS event_3, +FROM + combined diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/google_ads_derived/conversion_event_categorization_v1/schema.yaml /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_ads_derived/conversion_event_categorization_v1/schema.yaml --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/google_ads_derived/conversion_event_categorization_v1/schema.yaml 1970-01-01 00:00:00.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_ads_derived/conversion_event_categorization_v1/schema.yaml 2024-06-04 17:57:43.000000000 +0000 @@ -0,0 +1,65 @@ +fields: +- mode: NULLABLE + name: client_id + type: STRING + description: Client ID +- mode: NULLABLE + name: first_seen_date + type: DATE + description: First Seen Date +- mode: NULLABLE + name: attribution_campaign + type: STRING + description: Attribution Campaign +- mode: NULLABLE + name: attribution_content + type: STRING + description: Attribution Content +- mode: NULLABLE + name: attribution_dltoken + type: STRING + description: Attribution Download Token +- mode: NULLABLE + name: attribution_medium + type: STRING + description: Attribution Medium +- mode: NULLABLE + name: attribution_source + type: STRING + description: Attribution Source +- mode: NULLABLE + name: report_date + type: DATE + description: Report Date +- mode: NULLABLE + name: sent_main_ping_in_first_7_days + type: BOOLEAN + description: Sent Main Ping In First 7 Days After First Seen Date Indicator +- mode: NULLABLE + name: country + type: STRING + description: Country +- mode: NULLABLE + name: dou + type: INT64 + description: DOU +- mode: NULLABLE + name: active_hours_sum + type: FLOAT + description: Active Hours Sum +- mode: NULLABLE + name: search_with_ads_count_all + type: INTEGER + description: Search With Ads Count All +- mode: NULLABLE + name: event_1 + type: BOOLEAN + description: Event 1 Indicator - 5 or more days of use and 1 or more search with ads (strictest event) +- mode: NULLABLE + name: event_2 + type: BOOLEAN + description: Event 2 Indicator - 3 or more days of use and 1 or more search with ads (medium event) +- mode: NULLABLE + name: event_3 + type: BOOLEAN + description: Event 3 Indicator - 3 or more days of use and 0.4 or more active hours (most lenient event) diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/telemetry_derived/desktop_retention_v1/backfill.yaml /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/telemetry_derived/desktop_retention_v1/backfill.yaml --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/telemetry_derived/desktop_retention_v1/backfill.yaml 2024-06-04 17:57:36.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/telemetry_derived/desktop_retention_v1/backfill.yaml 2024-06-04 17:57:43.000000000 +0000 @@ -6,4 +6,4 @@ - mhirose@mozilla.com - anicholson@mozilla.com - wichan@mozilla.com - status: Complete + status: Initiate ```

Link to full diff

dataops-ci-bot commented 4 months ago

Integration report for "Update metadata.yaml"

sql.diff

Click to expand! ```diff Only in /tmp/workspace/generated-sql/dags/: bqetl_desktop_conv_evnt_categorization.py diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/dags/bqetl_analytics_tables.py /tmp/workspace/generated-sql/dags/bqetl_analytics_tables.py --- /tmp/workspace/main-generated-sql/dags/bqetl_analytics_tables.py 2024-06-04 18:02:39.000000000 +0000 +++ /tmp/workspace/generated-sql/dags/bqetl_analytics_tables.py 2024-06-04 18:04:19.000000000 +0000 @@ -324,6 +324,13 @@ ) ExternalTaskMarker( + task_id="bqetl_desktop_conv_evnt_categorization__wait_for_checks__fail_telemetry_derived__clients_first_seen__v2", + external_dag_id="bqetl_desktop_conv_evnt_categorization", + external_task_id="wait_for_checks__fail_telemetry_derived__clients_first_seen__v2", + execution_date="{{ (execution_date - macros.timedelta(days=-1, seconds=50400)).isoformat() }}", + ) + + ExternalTaskMarker( task_id="bqetl_mozilla_org_derived__wait_for_checks__fail_telemetry_derived__clients_first_seen__v2", external_dag_id="bqetl_mozilla_org_derived", external_task_id="wait_for_checks__fail_telemetry_derived__clients_first_seen__v2", diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/dags/bqetl_desktop_conv_evnt_categorization.py /tmp/workspace/generated-sql/dags/bqetl_desktop_conv_evnt_categorization.py --- /tmp/workspace/main-generated-sql/dags/bqetl_desktop_conv_evnt_categorization.py 1970-01-01 00:00:00.000000000 +0000 +++ /tmp/workspace/generated-sql/dags/bqetl_desktop_conv_evnt_categorization.py 2024-06-04 18:04:20.000000000 +0000 @@ -0,0 +1,116 @@ +# Generated via https://github.com/mozilla/bigquery-etl/blob/main/bigquery_etl/query_scheduling/generate_airflow_dags.py + +from airflow import DAG +from airflow.sensors.external_task import ExternalTaskMarker +from airflow.sensors.external_task import ExternalTaskSensor +from airflow.utils.task_group import TaskGroup +import datetime +from operators.gcp_container_operator import GKEPodOperator +from utils.constants import ALLOWED_STATES, FAILED_STATES +from utils.gcp import bigquery_etl_query, bigquery_dq_check + +docs = """ +### bqetl_desktop_conv_evnt_categorization + +Built from bigquery-etl repo, [`dags/bqetl_desktop_conv_evnt_categorization.py`](https://github.com/mozilla/bigquery-etl/blob/generated-sql/dags/bqetl_desktop_conv_evnt_categorization.py) + +#### Description + +Loads the desktop conversion event tables +#### Owner + +kwindau@mozilla.com + +#### Tags + +* impact/tier_2 +* repo/bigquery-etl +""" + + +default_args = { + "owner": "kwindau@mozilla.com", + "start_date": datetime.datetime(2024, 6, 4, 0, 0), + "end_date": None, + "email": ["kwindau@mozilla.com", "telemetry-alerts@mozilla.com"], + "depends_on_past": False, + "retry_delay": datetime.timedelta(seconds=1800), + "email_on_failure": True, + "email_on_retry": False, + "retries": 2, +} + +tags = ["impact/tier_2", "repo/bigquery-etl"] + +with DAG( + "bqetl_desktop_conv_evnt_categorization", + default_args=default_args, + schedule_interval="0 12 * * *", + doc_md=docs, + tags=tags, +) as dag: + + wait_for_checks__fail_telemetry_derived__clients_first_seen__v2 = ( + ExternalTaskSensor( + task_id="wait_for_checks__fail_telemetry_derived__clients_first_seen__v2", + external_dag_id="bqetl_analytics_tables", + external_task_id="checks__fail_telemetry_derived__clients_first_seen__v2", + execution_delta=datetime.timedelta(seconds=36000), + check_existence=True, + mode="reschedule", + allowed_states=ALLOWED_STATES, + failed_states=FAILED_STATES, + pool="DATA_ENG_EXTERNALTASKSENSOR", + ) + ) + + wait_for_checks__fail_telemetry_derived__clients_last_seen__v2 = ExternalTaskSensor( + task_id="wait_for_checks__fail_telemetry_derived__clients_last_seen__v2", + external_dag_id="bqetl_main_summary", + external_task_id="checks__fail_telemetry_derived__clients_last_seen__v2", + execution_delta=datetime.timedelta(seconds=36000), + check_existence=True, + mode="reschedule", + allowed_states=ALLOWED_STATES, + failed_states=FAILED_STATES, + pool="DATA_ENG_EXTERNALTASKSENSOR", + ) + + checks__warn_google_ads_derived__conversion_event_categorization__v1 = bigquery_dq_check( + task_id="checks__warn_google_ads_derived__conversion_event_categorization__v1", + source_table='conversion_event_categorization_v1${{ macros.ds_format(macros.ds_add(ds, -14), "%Y-%m-%d", "%Y%m%d") }}', + dataset_id="google_ads_derived", + project_id="moz-fx-data-shared-prod", + is_dq_check_fail=False, + owner="kwindau@mozilla.com", + email=["kwindau@mozilla.com", "telemetry-alerts@mozilla.com"], + depends_on_past=False, + parameters=["first_seen_date:DATE:{{macros.ds_add(ds, -14)}}"] + + ["submission_date:DATE:{{ds}}"], + retries=0, + ) + + google_ads_derived__conversion_event_categorization__v1 = bigquery_etl_query( + task_id="google_ads_derived__conversion_event_categorization__v1", + destination_table='conversion_event_categorization_v1${{ macros.ds_format(macros.ds_add(ds, -14), "%Y-%m-%d", "%Y%m%d") }}', + dataset_id="google_ads_derived", + project_id="moz-fx-data-shared-prod", + owner="kwindau@mozilla.com", + email=["kwindau@mozilla.com", "telemetry-alerts@mozilla.com"], + date_partition_parameter=None, + depends_on_past=False, + parameters=["first_seen_date:DATE:{{macros.ds_add(ds, -14)}}"] + + ["submission_date:DATE:{{ds}}"], + ) + + checks__warn_google_ads_derived__conversion_event_categorization__v1.set_upstream( + google_ads_derived__conversion_event_categorization__v1 + ) + + google_ads_derived__conversion_event_categorization__v1.set_upstream( + wait_for_checks__fail_telemetry_derived__clients_first_seen__v2 + ) + + google_ads_derived__conversion_event_categorization__v1.set_upstream( + wait_for_checks__fail_telemetry_derived__clients_last_seen__v2 + ) diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/dags/bqetl_main_summary.py /tmp/workspace/generated-sql/dags/bqetl_main_summary.py --- /tmp/workspace/main-generated-sql/dags/bqetl_main_summary.py 2024-06-04 18:02:39.000000000 +0000 +++ /tmp/workspace/generated-sql/dags/bqetl_main_summary.py 2024-06-04 18:04:15.000000000 +0000 @@ -144,6 +144,13 @@ ) ExternalTaskMarker( + task_id="bqetl_desktop_conv_evnt_categorization__wait_for_checks__fail_telemetry_derived__clients_last_seen__v2", + external_dag_id="bqetl_desktop_conv_evnt_categorization", + external_task_id="wait_for_checks__fail_telemetry_derived__clients_last_seen__v2", + execution_date="{{ (execution_date - macros.timedelta(days=-1, seconds=50400)).isoformat() }}", + ) + + ExternalTaskMarker( task_id="bqetl_search_dashboard__wait_for_checks__fail_telemetry_derived__clients_last_seen__v2", external_dag_id="bqetl_search_dashboard", external_task_id="wait_for_checks__fail_telemetry_derived__clients_last_seen__v2", Only in /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_ads: conversion_event_categorization Only in /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_ads_derived: conversion_event_categorization_v1 diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/google_ads/conversion_event_categorization/metadata.yaml /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_ads/conversion_event_categorization/metadata.yaml --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/google_ads/conversion_event_categorization/metadata.yaml 1970-01-01 00:00:00.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_ads/conversion_event_categorization/metadata.yaml 2024-06-04 18:00:53.000000000 +0000 @@ -0,0 +1,13 @@ +friendly_name: Conversion Event Categorization +description: |- + Please provide a description for the query +owners: [] +labels: {} +bigquery: null +workgroup_access: +- role: roles/bigquery.dataViewer + members: + - workgroup:mozilla-confidential +references: + view.sql: + - moz-fx-data-shared-prod.google_ads_derived.conversion_event_categorization_v1 diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/google_ads/conversion_event_categorization/view.sql /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_ads/conversion_event_categorization/view.sql --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/google_ads/conversion_event_categorization/view.sql 1970-01-01 00:00:00.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_ads/conversion_event_categorization/view.sql 2024-06-04 17:58:58.000000000 +0000 @@ -0,0 +1,7 @@ +CREATE OR REPLACE VIEW + `moz-fx-data-shared-prod.google_ads.conversion_event_categorization` +AS +SELECT + * +FROM + `moz-fx-data-shared-prod.google_ads_derived.conversion_event_categorization_v1` diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/google_ads_derived/conversion_event_categorization_v1/checks.sql /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_ads_derived/conversion_event_categorization_v1/checks.sql --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/google_ads_derived/conversion_event_categorization_v1/checks.sql 1970-01-01 00:00:00.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_ads_derived/conversion_event_categorization_v1/checks.sql 2024-06-04 17:58:58.000000000 +0000 @@ -0,0 +1,2 @@ +#warn +{{ is_unique(["client_id"]) }} diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/google_ads_derived/conversion_event_categorization_v1/metadata.yaml /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_ads_derived/conversion_event_categorization_v1/metadata.yaml --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/google_ads_derived/conversion_event_categorization_v1/metadata.yaml 1970-01-01 00:00:00.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_ads_derived/conversion_event_categorization_v1/metadata.yaml 2024-06-04 18:00:53.000000000 +0000 @@ -0,0 +1,32 @@ +friendly_name: Conversion Event Categorization +description: |- + Classifies conversion events +owners: +- kwindau@mozilla.com +labels: + incremental: true + owner1: kwindau + dag: bqetl_desktop_conv_evnt_categorization +scheduling: + dag_name: bqetl_desktop_conv_evnt_categorization + depends_on_past: false + date_partition_parameter: first_seen_date + date_partition_offset: -14 + parameters: + - submission_date:DATE:{{ds}} +bigquery: + time_partitioning: + type: day + field: first_seen_date + require_partition_filter: true + expiration_days: null + range_partitioning: null + clustering: + fields: + - client_id + - country +workgroup_access: +- role: roles/bigquery.dataViewer + members: + - workgroup:mozilla-confidential +references: {} diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/google_ads_derived/conversion_event_categorization_v1/query.sql /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_ads_derived/conversion_event_categorization_v1/query.sql --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/google_ads_derived/conversion_event_categorization_v1/query.sql 1970-01-01 00:00:00.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_ads_derived/conversion_event_categorization_v1/query.sql 2024-06-04 17:58:58.000000000 +0000 @@ -0,0 +1,131 @@ +--STEP 1: Get clients with a first seen date = submission date - 14 days +--Note: Min cohort date is 2023-11-01 so backfilling will return nothing before then +--Note: Max cohort date cannot be more than 7 days ago (to ensure we always have at least 7 days of data) +WITH clients_first_seen_14_days_ago AS ( + SELECT + client_id, + first_seen_date, + country, + attribution_campaign, + attribution_content, + attribution_dltoken, + attribution_medium, + attribution_source + FROM + `moz-fx-data-shared-prod.telemetry.clients_first_seen` --contains all new clients, including those that never sent a main ping + WHERE + first_seen_date = @submission_date --this is 14 days before {{ds}} + AND first_seen_date + BETWEEN '2023-11-01' + AND DATE_SUB(CURRENT_DATE, INTERVAL 8 DAY) +), +--Step 2: Get only the columns we need from clients last seen, for only the small window of time we need +clients_last_seen_raw AS ( + SELECT + cls.client_id, + cls.first_seen_date, + cls.country, + cls.submission_date, + cls.days_since_seen, + cls.active_hours_sum, + cls.days_visited_1_uri_bits, + cls.days_interacted_bits, + cls.search_with_ads_count_all + FROM + `moz-fx-data-shared-prod.telemetry.clients_last_seen` cls + JOIN + clients_first_seen_14_days_ago clients + ON cls.client_id = clients.client_id + WHERE + cls.submission_date >= '2023-11-01' --first cohort date + AND cls.submission_date + BETWEEN cls.first_seen_date + AND DATE_ADD(cls.first_seen_date, INTERVAL 6 DAY) --get first 7 days from their first main ping + --to process less data, we only check for pings between @submission date - 15 days and submission date + 15 days for each date this runs + AND cls.submission_date + BETWEEN DATE_SUB(@submission_date, INTERVAL 1 DAY) --15 days before DS + AND DATE_ADD(@submission_date, INTERVAL 29 DAY) --29 days after DS +), +--STEP 2: For every client, get the first 7 days worth of main pings sent after their first main ping +client_activity_first_7_days AS ( + SELECT + client_id, + ANY_VALUE( + first_seen_date + ) AS first_seen_date, --date we got first main ping (potentially different than above first seen date) + ANY_VALUE( + CASE + WHEN first_seen_date = submission_date + THEN country + END + ) AS country, --any country from their first 7 days of main pings + ANY_VALUE( + CASE + WHEN submission_date = DATE_ADD(first_seen_date, INTERVAL 6 DAY) + THEN BIT_COUNT(days_visited_1_uri_bits & days_interacted_bits) + END + ) AS dou, --total # of days of activity during their first 7 days of main pings + -- if a client doesn't send a ping on `submission_date` their last active day's value will be carried forward + -- so we only take measurements from days that they send a ping. + SUM( + CASE + WHEN days_since_seen = 0 + THEN COALESCE(active_hours_sum, 0) + ELSE 0 + END + ) AS active_hours_sum, + SUM( + CASE + WHEN days_since_seen = 0 + THEN COALESCE(search_with_ads_count_all, 0) + ELSE 0 + END + ) AS search_with_ads_count_all + FROM + clients_last_seen_raw + GROUP BY + client_id +), +combined AS ( + SELECT + cfs.client_id, + cfs.first_seen_date, + cfs.attribution_campaign, + cfs.attribution_content, + cfs.attribution_dltoken, + cfs.attribution_medium, + cfs.attribution_source, + @submission_date AS report_date, + IF(cls.first_seen_date IS NOT NULL, TRUE, FALSE) AS sent_main_ping_in_first_7_days, + COALESCE( + cls.country, + cfs.country + ) AS country, -- Conversion events & LTV are based on their first observed country in CLS, use that country if its available + COALESCE(dou, 0) AS dou, + COALESCE(active_hours_sum, 0) AS active_hours_sum, + COALESCE(search_with_ads_count_all, 0) AS search_with_ads_count_all + FROM + clients_first_seen_14_days_ago AS cfs + LEFT JOIN + client_activity_first_7_days AS cls + USING (client_id) +) +SELECT + client_id, + first_seen_date, + attribution_campaign, + attribution_content, + attribution_dltoken, + attribution_medium, + attribution_source, + report_date, + sent_main_ping_in_first_7_days, + country, + dou, + active_hours_sum, + search_with_ads_count_all, + IF(search_with_ads_count_all > 0 AND dou >= 5, TRUE, FALSE) AS event_1, + IF(search_with_ads_count_all > 0 AND dou >= 3, TRUE, FALSE) AS event_2, + IF(active_hours_sum >= 0.4 AND dou >= 3, TRUE, FALSE) AS event_3, +FROM + combined diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/google_ads_derived/conversion_event_categorization_v1/schema.yaml /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_ads_derived/conversion_event_categorization_v1/schema.yaml --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/google_ads_derived/conversion_event_categorization_v1/schema.yaml 1970-01-01 00:00:00.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_ads_derived/conversion_event_categorization_v1/schema.yaml 2024-06-04 17:58:58.000000000 +0000 @@ -0,0 +1,65 @@ +fields: +- mode: NULLABLE + name: client_id + type: STRING + description: Client ID +- mode: NULLABLE + name: first_seen_date + type: DATE + description: First Seen Date +- mode: NULLABLE + name: attribution_campaign + type: STRING + description: Attribution Campaign +- mode: NULLABLE + name: attribution_content + type: STRING + description: Attribution Content +- mode: NULLABLE + name: attribution_dltoken + type: STRING + description: Attribution Download Token +- mode: NULLABLE + name: attribution_medium + type: STRING + description: Attribution Medium +- mode: NULLABLE + name: attribution_source + type: STRING + description: Attribution Source +- mode: NULLABLE + name: report_date + type: DATE + description: Report Date +- mode: NULLABLE + name: sent_main_ping_in_first_7_days + type: BOOLEAN + description: Sent Main Ping In First 7 Days After First Seen Date Indicator +- mode: NULLABLE + name: country + type: STRING + description: Country +- mode: NULLABLE + name: dou + type: INT64 + description: DOU +- mode: NULLABLE + name: active_hours_sum + type: FLOAT + description: Active Hours Sum +- mode: NULLABLE + name: search_with_ads_count_all + type: INTEGER + description: Search With Ads Count All +- mode: NULLABLE + name: event_1 + type: BOOLEAN + description: Event 1 Indicator - 5 or more days of use and 1 or more search with ads (strictest event) +- mode: NULLABLE + name: event_2 + type: BOOLEAN + description: Event 2 Indicator - 3 or more days of use and 1 or more search with ads (medium event) +- mode: NULLABLE + name: event_3 + type: BOOLEAN + description: Event 3 Indicator - 3 or more days of use and 0.4 or more active hours (most lenient event) diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/telemetry_derived/desktop_retention_v1/backfill.yaml /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/telemetry_derived/desktop_retention_v1/backfill.yaml --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/telemetry_derived/desktop_retention_v1/backfill.yaml 2024-06-04 17:59:01.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/telemetry_derived/desktop_retention_v1/backfill.yaml 2024-06-04 17:58:58.000000000 +0000 @@ -6,4 +6,4 @@ - mhirose@mozilla.com - anicholson@mozilla.com - wichan@mozilla.com - status: Complete + status: Initiate ```

Link to full diff

dataops-ci-bot commented 4 months ago

Integration report for "Merge branch 'main' into dsktp-conv-evnts"

sql.diff

Click to expand! ```diff Only in /tmp/workspace/generated-sql/dags/: bqetl_desktop_conv_evnt_categorization.py diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/dags/bqetl_analytics_tables.py /tmp/workspace/generated-sql/dags/bqetl_analytics_tables.py --- /tmp/workspace/main-generated-sql/dags/bqetl_analytics_tables.py 2024-06-04 18:29:23.000000000 +0000 +++ /tmp/workspace/generated-sql/dags/bqetl_analytics_tables.py 2024-06-04 18:30:53.000000000 +0000 @@ -324,6 +324,13 @@ ) ExternalTaskMarker( + task_id="bqetl_desktop_conv_evnt_categorization__wait_for_checks__fail_telemetry_derived__clients_first_seen__v2", + external_dag_id="bqetl_desktop_conv_evnt_categorization", + external_task_id="wait_for_checks__fail_telemetry_derived__clients_first_seen__v2", + execution_date="{{ (execution_date - macros.timedelta(days=-1, seconds=50400)).isoformat() }}", + ) + + ExternalTaskMarker( task_id="bqetl_mozilla_org_derived__wait_for_checks__fail_telemetry_derived__clients_first_seen__v2", external_dag_id="bqetl_mozilla_org_derived", external_task_id="wait_for_checks__fail_telemetry_derived__clients_first_seen__v2", diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/dags/bqetl_desktop_conv_evnt_categorization.py /tmp/workspace/generated-sql/dags/bqetl_desktop_conv_evnt_categorization.py --- /tmp/workspace/main-generated-sql/dags/bqetl_desktop_conv_evnt_categorization.py 1970-01-01 00:00:00.000000000 +0000 +++ /tmp/workspace/generated-sql/dags/bqetl_desktop_conv_evnt_categorization.py 2024-06-04 18:30:54.000000000 +0000 @@ -0,0 +1,116 @@ +# Generated via https://github.com/mozilla/bigquery-etl/blob/main/bigquery_etl/query_scheduling/generate_airflow_dags.py + +from airflow import DAG +from airflow.sensors.external_task import ExternalTaskMarker +from airflow.sensors.external_task import ExternalTaskSensor +from airflow.utils.task_group import TaskGroup +import datetime +from operators.gcp_container_operator import GKEPodOperator +from utils.constants import ALLOWED_STATES, FAILED_STATES +from utils.gcp import bigquery_etl_query, bigquery_dq_check + +docs = """ +### bqetl_desktop_conv_evnt_categorization + +Built from bigquery-etl repo, [`dags/bqetl_desktop_conv_evnt_categorization.py`](https://github.com/mozilla/bigquery-etl/blob/generated-sql/dags/bqetl_desktop_conv_evnt_categorization.py) + +#### Description + +Loads the desktop conversion event tables +#### Owner + +kwindau@mozilla.com + +#### Tags + +* impact/tier_2 +* repo/bigquery-etl +""" + + +default_args = { + "owner": "kwindau@mozilla.com", + "start_date": datetime.datetime(2024, 6, 4, 0, 0), + "end_date": None, + "email": ["kwindau@mozilla.com", "telemetry-alerts@mozilla.com"], + "depends_on_past": False, + "retry_delay": datetime.timedelta(seconds=1800), + "email_on_failure": True, + "email_on_retry": False, + "retries": 2, +} + +tags = ["impact/tier_2", "repo/bigquery-etl"] + +with DAG( + "bqetl_desktop_conv_evnt_categorization", + default_args=default_args, + schedule_interval="0 12 * * *", + doc_md=docs, + tags=tags, +) as dag: + + wait_for_checks__fail_telemetry_derived__clients_first_seen__v2 = ( + ExternalTaskSensor( + task_id="wait_for_checks__fail_telemetry_derived__clients_first_seen__v2", + external_dag_id="bqetl_analytics_tables", + external_task_id="checks__fail_telemetry_derived__clients_first_seen__v2", + execution_delta=datetime.timedelta(seconds=36000), + check_existence=True, + mode="reschedule", + allowed_states=ALLOWED_STATES, + failed_states=FAILED_STATES, + pool="DATA_ENG_EXTERNALTASKSENSOR", + ) + ) + + wait_for_checks__fail_telemetry_derived__clients_last_seen__v2 = ExternalTaskSensor( + task_id="wait_for_checks__fail_telemetry_derived__clients_last_seen__v2", + external_dag_id="bqetl_main_summary", + external_task_id="checks__fail_telemetry_derived__clients_last_seen__v2", + execution_delta=datetime.timedelta(seconds=36000), + check_existence=True, + mode="reschedule", + allowed_states=ALLOWED_STATES, + failed_states=FAILED_STATES, + pool="DATA_ENG_EXTERNALTASKSENSOR", + ) + + checks__warn_google_ads_derived__conversion_event_categorization__v1 = bigquery_dq_check( + task_id="checks__warn_google_ads_derived__conversion_event_categorization__v1", + source_table='conversion_event_categorization_v1${{ macros.ds_format(macros.ds_add(ds, -14), "%Y-%m-%d", "%Y%m%d") }}', + dataset_id="google_ads_derived", + project_id="moz-fx-data-shared-prod", + is_dq_check_fail=False, + owner="kwindau@mozilla.com", + email=["kwindau@mozilla.com", "telemetry-alerts@mozilla.com"], + depends_on_past=False, + parameters=["first_seen_date:DATE:{{macros.ds_add(ds, -14)}}"] + + ["submission_date:DATE:{{ds}}"], + retries=0, + ) + + google_ads_derived__conversion_event_categorization__v1 = bigquery_etl_query( + task_id="google_ads_derived__conversion_event_categorization__v1", + destination_table='conversion_event_categorization_v1${{ macros.ds_format(macros.ds_add(ds, -14), "%Y-%m-%d", "%Y%m%d") }}', + dataset_id="google_ads_derived", + project_id="moz-fx-data-shared-prod", + owner="kwindau@mozilla.com", + email=["kwindau@mozilla.com", "telemetry-alerts@mozilla.com"], + date_partition_parameter=None, + depends_on_past=False, + parameters=["first_seen_date:DATE:{{macros.ds_add(ds, -14)}}"] + + ["submission_date:DATE:{{ds}}"], + ) + + checks__warn_google_ads_derived__conversion_event_categorization__v1.set_upstream( + google_ads_derived__conversion_event_categorization__v1 + ) + + google_ads_derived__conversion_event_categorization__v1.set_upstream( + wait_for_checks__fail_telemetry_derived__clients_first_seen__v2 + ) + + google_ads_derived__conversion_event_categorization__v1.set_upstream( + wait_for_checks__fail_telemetry_derived__clients_last_seen__v2 + ) diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/dags/bqetl_main_summary.py /tmp/workspace/generated-sql/dags/bqetl_main_summary.py --- /tmp/workspace/main-generated-sql/dags/bqetl_main_summary.py 2024-06-04 18:29:23.000000000 +0000 +++ /tmp/workspace/generated-sql/dags/bqetl_main_summary.py 2024-06-04 18:30:50.000000000 +0000 @@ -144,6 +144,13 @@ ) ExternalTaskMarker( + task_id="bqetl_desktop_conv_evnt_categorization__wait_for_checks__fail_telemetry_derived__clients_last_seen__v2", + external_dag_id="bqetl_desktop_conv_evnt_categorization", + external_task_id="wait_for_checks__fail_telemetry_derived__clients_last_seen__v2", + execution_date="{{ (execution_date - macros.timedelta(days=-1, seconds=50400)).isoformat() }}", + ) + + ExternalTaskMarker( task_id="bqetl_search_dashboard__wait_for_checks__fail_telemetry_derived__clients_last_seen__v2", external_dag_id="bqetl_search_dashboard", external_task_id="wait_for_checks__fail_telemetry_derived__clients_last_seen__v2", Only in /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_ads: conversion_event_categorization Only in /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_ads_derived: conversion_event_categorization_v1 diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/google_ads/conversion_event_categorization/metadata.yaml /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_ads/conversion_event_categorization/metadata.yaml --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/google_ads/conversion_event_categorization/metadata.yaml 1970-01-01 00:00:00.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_ads/conversion_event_categorization/metadata.yaml 2024-06-04 18:27:54.000000000 +0000 @@ -0,0 +1,13 @@ +friendly_name: Conversion Event Categorization +description: |- + Please provide a description for the query +owners: [] +labels: {} +bigquery: null +workgroup_access: +- role: roles/bigquery.dataViewer + members: + - workgroup:mozilla-confidential +references: + view.sql: + - moz-fx-data-shared-prod.google_ads_derived.conversion_event_categorization_v1 diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/google_ads/conversion_event_categorization/view.sql /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_ads/conversion_event_categorization/view.sql --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/google_ads/conversion_event_categorization/view.sql 1970-01-01 00:00:00.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_ads/conversion_event_categorization/view.sql 2024-06-04 18:25:59.000000000 +0000 @@ -0,0 +1,7 @@ +CREATE OR REPLACE VIEW + `moz-fx-data-shared-prod.google_ads.conversion_event_categorization` +AS +SELECT + * +FROM + `moz-fx-data-shared-prod.google_ads_derived.conversion_event_categorization_v1` diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/google_ads_derived/conversion_event_categorization_v1/checks.sql /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_ads_derived/conversion_event_categorization_v1/checks.sql --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/google_ads_derived/conversion_event_categorization_v1/checks.sql 1970-01-01 00:00:00.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_ads_derived/conversion_event_categorization_v1/checks.sql 2024-06-04 18:25:59.000000000 +0000 @@ -0,0 +1,2 @@ +#warn +{{ is_unique(["client_id"]) }} diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/google_ads_derived/conversion_event_categorization_v1/metadata.yaml /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_ads_derived/conversion_event_categorization_v1/metadata.yaml --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/google_ads_derived/conversion_event_categorization_v1/metadata.yaml 1970-01-01 00:00:00.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_ads_derived/conversion_event_categorization_v1/metadata.yaml 2024-06-04 18:27:54.000000000 +0000 @@ -0,0 +1,32 @@ +friendly_name: Conversion Event Categorization +description: |- + Classifies conversion events +owners: +- kwindau@mozilla.com +labels: + incremental: true + owner1: kwindau + dag: bqetl_desktop_conv_evnt_categorization +scheduling: + dag_name: bqetl_desktop_conv_evnt_categorization + depends_on_past: false + date_partition_parameter: first_seen_date + date_partition_offset: -14 + parameters: + - submission_date:DATE:{{ds}} +bigquery: + time_partitioning: + type: day + field: first_seen_date + require_partition_filter: true + expiration_days: null + range_partitioning: null + clustering: + fields: + - client_id + - country +workgroup_access: +- role: roles/bigquery.dataViewer + members: + - workgroup:mozilla-confidential +references: {} diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/google_ads_derived/conversion_event_categorization_v1/query.sql /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_ads_derived/conversion_event_categorization_v1/query.sql --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/google_ads_derived/conversion_event_categorization_v1/query.sql 1970-01-01 00:00:00.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_ads_derived/conversion_event_categorization_v1/query.sql 2024-06-04 18:25:59.000000000 +0000 @@ -0,0 +1,131 @@ +--STEP 1: Get clients with a first seen date = submission date - 14 days +--Note: Min cohort date is 2023-11-01 so backfilling will return nothing before then +--Note: Max cohort date cannot be more than 7 days ago (to ensure we always have at least 7 days of data) +WITH clients_first_seen_14_days_ago AS ( + SELECT + client_id, + first_seen_date, + country, + attribution_campaign, + attribution_content, + attribution_dltoken, + attribution_medium, + attribution_source + FROM + `moz-fx-data-shared-prod.telemetry.clients_first_seen` --contains all new clients, including those that never sent a main ping + WHERE + first_seen_date = @submission_date --this is 14 days before {{ds}} + AND first_seen_date + BETWEEN '2023-11-01' + AND DATE_SUB(CURRENT_DATE, INTERVAL 8 DAY) +), +--Step 2: Get only the columns we need from clients last seen, for only the small window of time we need +clients_last_seen_raw AS ( + SELECT + cls.client_id, + cls.first_seen_date, + cls.country, + cls.submission_date, + cls.days_since_seen, + cls.active_hours_sum, + cls.days_visited_1_uri_bits, + cls.days_interacted_bits, + cls.search_with_ads_count_all + FROM + `moz-fx-data-shared-prod.telemetry.clients_last_seen` cls + JOIN + clients_first_seen_14_days_ago clients + ON cls.client_id = clients.client_id + WHERE + cls.submission_date >= '2023-11-01' --first cohort date + AND cls.submission_date + BETWEEN cls.first_seen_date + AND DATE_ADD(cls.first_seen_date, INTERVAL 6 DAY) --get first 7 days from their first main ping + --to process less data, we only check for pings between @submission date - 15 days and submission date + 15 days for each date this runs + AND cls.submission_date + BETWEEN DATE_SUB(@submission_date, INTERVAL 1 DAY) --15 days before DS + AND DATE_ADD(@submission_date, INTERVAL 29 DAY) --29 days after DS +), +--STEP 2: For every client, get the first 7 days worth of main pings sent after their first main ping +client_activity_first_7_days AS ( + SELECT + client_id, + ANY_VALUE( + first_seen_date + ) AS first_seen_date, --date we got first main ping (potentially different than above first seen date) + ANY_VALUE( + CASE + WHEN first_seen_date = submission_date + THEN country + END + ) AS country, --any country from their first 7 days of main pings + ANY_VALUE( + CASE + WHEN submission_date = DATE_ADD(first_seen_date, INTERVAL 6 DAY) + THEN BIT_COUNT(days_visited_1_uri_bits & days_interacted_bits) + END + ) AS dou, --total # of days of activity during their first 7 days of main pings + -- if a client doesn't send a ping on `submission_date` their last active day's value will be carried forward + -- so we only take measurements from days that they send a ping. + SUM( + CASE + WHEN days_since_seen = 0 + THEN COALESCE(active_hours_sum, 0) + ELSE 0 + END + ) AS active_hours_sum, + SUM( + CASE + WHEN days_since_seen = 0 + THEN COALESCE(search_with_ads_count_all, 0) + ELSE 0 + END + ) AS search_with_ads_count_all + FROM + clients_last_seen_raw + GROUP BY + client_id +), +combined AS ( + SELECT + cfs.client_id, + cfs.first_seen_date, + cfs.attribution_campaign, + cfs.attribution_content, + cfs.attribution_dltoken, + cfs.attribution_medium, + cfs.attribution_source, + @submission_date AS report_date, + IF(cls.first_seen_date IS NOT NULL, TRUE, FALSE) AS sent_main_ping_in_first_7_days, + COALESCE( + cls.country, + cfs.country + ) AS country, -- Conversion events & LTV are based on their first observed country in CLS, use that country if its available + COALESCE(dou, 0) AS dou, + COALESCE(active_hours_sum, 0) AS active_hours_sum, + COALESCE(search_with_ads_count_all, 0) AS search_with_ads_count_all + FROM + clients_first_seen_14_days_ago AS cfs + LEFT JOIN + client_activity_first_7_days AS cls + USING (client_id) +) +SELECT + client_id, + first_seen_date, + attribution_campaign, + attribution_content, + attribution_dltoken, + attribution_medium, + attribution_source, + report_date, + sent_main_ping_in_first_7_days, + country, + dou, + active_hours_sum, + search_with_ads_count_all, + IF(search_with_ads_count_all > 0 AND dou >= 5, TRUE, FALSE) AS event_1, + IF(search_with_ads_count_all > 0 AND dou >= 3, TRUE, FALSE) AS event_2, + IF(active_hours_sum >= 0.4 AND dou >= 3, TRUE, FALSE) AS event_3, +FROM + combined diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/google_ads_derived/conversion_event_categorization_v1/schema.yaml /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_ads_derived/conversion_event_categorization_v1/schema.yaml --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/google_ads_derived/conversion_event_categorization_v1/schema.yaml 1970-01-01 00:00:00.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_ads_derived/conversion_event_categorization_v1/schema.yaml 2024-06-04 18:25:59.000000000 +0000 @@ -0,0 +1,65 @@ +fields: +- mode: NULLABLE + name: client_id + type: STRING + description: Client ID +- mode: NULLABLE + name: first_seen_date + type: DATE + description: First Seen Date +- mode: NULLABLE + name: attribution_campaign + type: STRING + description: Attribution Campaign +- mode: NULLABLE + name: attribution_content + type: STRING + description: Attribution Content +- mode: NULLABLE + name: attribution_dltoken + type: STRING + description: Attribution Download Token +- mode: NULLABLE + name: attribution_medium + type: STRING + description: Attribution Medium +- mode: NULLABLE + name: attribution_source + type: STRING + description: Attribution Source +- mode: NULLABLE + name: report_date + type: DATE + description: Report Date +- mode: NULLABLE + name: sent_main_ping_in_first_7_days + type: BOOLEAN + description: Sent Main Ping In First 7 Days After First Seen Date Indicator +- mode: NULLABLE + name: country + type: STRING + description: Country +- mode: NULLABLE + name: dou + type: INT64 + description: DOU +- mode: NULLABLE + name: active_hours_sum + type: FLOAT + description: Active Hours Sum +- mode: NULLABLE + name: search_with_ads_count_all + type: INTEGER + description: Search With Ads Count All +- mode: NULLABLE + name: event_1 + type: BOOLEAN + description: Event 1 Indicator - 5 or more days of use and 1 or more search with ads (strictest event) +- mode: NULLABLE + name: event_2 + type: BOOLEAN + description: Event 2 Indicator - 3 or more days of use and 1 or more search with ads (medium event) +- mode: NULLABLE + name: event_3 + type: BOOLEAN + description: Event 3 Indicator - 3 or more days of use and 0.4 or more active hours (most lenient event) ```

Link to full diff

dataops-ci-bot commented 4 months ago

Integration report for "Change to partition on report date"

sql.diff

Click to expand! ```diff Only in /tmp/workspace/generated-sql/dags/: bqetl_desktop_conv_evnt_categorization.py diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/dags/bqetl_analytics_tables.py /tmp/workspace/generated-sql/dags/bqetl_analytics_tables.py --- /tmp/workspace/main-generated-sql/dags/bqetl_analytics_tables.py 2024-06-04 18:42:00.000000000 +0000 +++ /tmp/workspace/generated-sql/dags/bqetl_analytics_tables.py 2024-06-04 18:42:49.000000000 +0000 @@ -324,6 +324,13 @@ ) ExternalTaskMarker( + task_id="bqetl_desktop_conv_evnt_categorization__wait_for_checks__fail_telemetry_derived__clients_first_seen__v2", + external_dag_id="bqetl_desktop_conv_evnt_categorization", + external_task_id="wait_for_checks__fail_telemetry_derived__clients_first_seen__v2", + execution_date="{{ (execution_date - macros.timedelta(days=-1, seconds=50400)).isoformat() }}", + ) + + ExternalTaskMarker( task_id="bqetl_mozilla_org_derived__wait_for_checks__fail_telemetry_derived__clients_first_seen__v2", external_dag_id="bqetl_mozilla_org_derived", external_task_id="wait_for_checks__fail_telemetry_derived__clients_first_seen__v2", diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/dags/bqetl_desktop_conv_evnt_categorization.py /tmp/workspace/generated-sql/dags/bqetl_desktop_conv_evnt_categorization.py --- /tmp/workspace/main-generated-sql/dags/bqetl_desktop_conv_evnt_categorization.py 1970-01-01 00:00:00.000000000 +0000 +++ /tmp/workspace/generated-sql/dags/bqetl_desktop_conv_evnt_categorization.py 2024-06-04 18:42:50.000000000 +0000 @@ -0,0 +1,116 @@ +# Generated via https://github.com/mozilla/bigquery-etl/blob/main/bigquery_etl/query_scheduling/generate_airflow_dags.py + +from airflow import DAG +from airflow.sensors.external_task import ExternalTaskMarker +from airflow.sensors.external_task import ExternalTaskSensor +from airflow.utils.task_group import TaskGroup +import datetime +from operators.gcp_container_operator import GKEPodOperator +from utils.constants import ALLOWED_STATES, FAILED_STATES +from utils.gcp import bigquery_etl_query, bigquery_dq_check + +docs = """ +### bqetl_desktop_conv_evnt_categorization + +Built from bigquery-etl repo, [`dags/bqetl_desktop_conv_evnt_categorization.py`](https://github.com/mozilla/bigquery-etl/blob/generated-sql/dags/bqetl_desktop_conv_evnt_categorization.py) + +#### Description + +Loads the desktop conversion event tables +#### Owner + +kwindau@mozilla.com + +#### Tags + +* impact/tier_2 +* repo/bigquery-etl +""" + + +default_args = { + "owner": "kwindau@mozilla.com", + "start_date": datetime.datetime(2024, 6, 4, 0, 0), + "end_date": None, + "email": ["kwindau@mozilla.com", "telemetry-alerts@mozilla.com"], + "depends_on_past": False, + "retry_delay": datetime.timedelta(seconds=1800), + "email_on_failure": True, + "email_on_retry": False, + "retries": 2, +} + +tags = ["impact/tier_2", "repo/bigquery-etl"] + +with DAG( + "bqetl_desktop_conv_evnt_categorization", + default_args=default_args, + schedule_interval="0 12 * * *", + doc_md=docs, + tags=tags, +) as dag: + + wait_for_checks__fail_telemetry_derived__clients_first_seen__v2 = ( + ExternalTaskSensor( + task_id="wait_for_checks__fail_telemetry_derived__clients_first_seen__v2", + external_dag_id="bqetl_analytics_tables", + external_task_id="checks__fail_telemetry_derived__clients_first_seen__v2", + execution_delta=datetime.timedelta(seconds=36000), + check_existence=True, + mode="reschedule", + allowed_states=ALLOWED_STATES, + failed_states=FAILED_STATES, + pool="DATA_ENG_EXTERNALTASKSENSOR", + ) + ) + + wait_for_checks__fail_telemetry_derived__clients_last_seen__v2 = ExternalTaskSensor( + task_id="wait_for_checks__fail_telemetry_derived__clients_last_seen__v2", + external_dag_id="bqetl_main_summary", + external_task_id="checks__fail_telemetry_derived__clients_last_seen__v2", + execution_delta=datetime.timedelta(seconds=36000), + check_existence=True, + mode="reschedule", + allowed_states=ALLOWED_STATES, + failed_states=FAILED_STATES, + pool="DATA_ENG_EXTERNALTASKSENSOR", + ) + + checks__warn_google_ads_derived__conversion_event_categorization__v1 = bigquery_dq_check( + task_id="checks__warn_google_ads_derived__conversion_event_categorization__v1", + source_table='conversion_event_categorization_v1${{ macros.ds_format(macros.ds_add(ds, -14), "%Y-%m-%d", "%Y%m%d") }}', + dataset_id="google_ads_derived", + project_id="moz-fx-data-shared-prod", + is_dq_check_fail=False, + owner="kwindau@mozilla.com", + email=["kwindau@mozilla.com", "telemetry-alerts@mozilla.com"], + depends_on_past=False, + parameters=["report_date:DATE:{{macros.ds_add(ds, -14)}}"] + + ["submission_date:DATE:{{ds}}"], + retries=0, + ) + + google_ads_derived__conversion_event_categorization__v1 = bigquery_etl_query( + task_id="google_ads_derived__conversion_event_categorization__v1", + destination_table='conversion_event_categorization_v1${{ macros.ds_format(macros.ds_add(ds, -14), "%Y-%m-%d", "%Y%m%d") }}', + dataset_id="google_ads_derived", + project_id="moz-fx-data-shared-prod", + owner="kwindau@mozilla.com", + email=["kwindau@mozilla.com", "telemetry-alerts@mozilla.com"], + date_partition_parameter=None, + depends_on_past=False, + parameters=["report_date:DATE:{{macros.ds_add(ds, -14)}}"] + + ["submission_date:DATE:{{ds}}"], + ) + + checks__warn_google_ads_derived__conversion_event_categorization__v1.set_upstream( + google_ads_derived__conversion_event_categorization__v1 + ) + + google_ads_derived__conversion_event_categorization__v1.set_upstream( + wait_for_checks__fail_telemetry_derived__clients_first_seen__v2 + ) + + google_ads_derived__conversion_event_categorization__v1.set_upstream( + wait_for_checks__fail_telemetry_derived__clients_last_seen__v2 + ) diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/dags/bqetl_main_summary.py /tmp/workspace/generated-sql/dags/bqetl_main_summary.py --- /tmp/workspace/main-generated-sql/dags/bqetl_main_summary.py 2024-06-04 18:42:00.000000000 +0000 +++ /tmp/workspace/generated-sql/dags/bqetl_main_summary.py 2024-06-04 18:42:47.000000000 +0000 @@ -144,6 +144,13 @@ ) ExternalTaskMarker( + task_id="bqetl_desktop_conv_evnt_categorization__wait_for_checks__fail_telemetry_derived__clients_last_seen__v2", + external_dag_id="bqetl_desktop_conv_evnt_categorization", + external_task_id="wait_for_checks__fail_telemetry_derived__clients_last_seen__v2", + execution_date="{{ (execution_date - macros.timedelta(days=-1, seconds=50400)).isoformat() }}", + ) + + ExternalTaskMarker( task_id="bqetl_search_dashboard__wait_for_checks__fail_telemetry_derived__clients_last_seen__v2", external_dag_id="bqetl_search_dashboard", external_task_id="wait_for_checks__fail_telemetry_derived__clients_last_seen__v2", Only in /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_ads: conversion_event_categorization Only in /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_ads_derived: conversion_event_categorization_v1 diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/google_ads/conversion_event_categorization/metadata.yaml /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_ads/conversion_event_categorization/metadata.yaml --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/google_ads/conversion_event_categorization/metadata.yaml 1970-01-01 00:00:00.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_ads/conversion_event_categorization/metadata.yaml 2024-06-04 18:40:05.000000000 +0000 @@ -0,0 +1,13 @@ +friendly_name: Conversion Event Categorization +description: |- + Please provide a description for the query +owners: [] +labels: {} +bigquery: null +workgroup_access: +- role: roles/bigquery.dataViewer + members: + - workgroup:mozilla-confidential +references: + view.sql: + - moz-fx-data-shared-prod.google_ads_derived.conversion_event_categorization_v1 diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/google_ads/conversion_event_categorization/view.sql /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_ads/conversion_event_categorization/view.sql --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/google_ads/conversion_event_categorization/view.sql 1970-01-01 00:00:00.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_ads/conversion_event_categorization/view.sql 2024-06-04 18:38:16.000000000 +0000 @@ -0,0 +1,7 @@ +CREATE OR REPLACE VIEW + `moz-fx-data-shared-prod.google_ads.conversion_event_categorization` +AS +SELECT + * +FROM + `moz-fx-data-shared-prod.google_ads_derived.conversion_event_categorization_v1` diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/google_ads_derived/conversion_event_categorization_v1/checks.sql /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_ads_derived/conversion_event_categorization_v1/checks.sql --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/google_ads_derived/conversion_event_categorization_v1/checks.sql 1970-01-01 00:00:00.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_ads_derived/conversion_event_categorization_v1/checks.sql 2024-06-04 18:38:16.000000000 +0000 @@ -0,0 +1,2 @@ +#warn +{{ is_unique(["client_id"]) }} diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/google_ads_derived/conversion_event_categorization_v1/metadata.yaml /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_ads_derived/conversion_event_categorization_v1/metadata.yaml --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/google_ads_derived/conversion_event_categorization_v1/metadata.yaml 1970-01-01 00:00:00.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_ads_derived/conversion_event_categorization_v1/metadata.yaml 2024-06-04 18:40:09.000000000 +0000 @@ -0,0 +1,32 @@ +friendly_name: Conversion Event Categorization +description: |- + Classifies conversion events +owners: +- kwindau@mozilla.com +labels: + incremental: true + owner1: kwindau + dag: bqetl_desktop_conv_evnt_categorization +scheduling: + dag_name: bqetl_desktop_conv_evnt_categorization + depends_on_past: false + date_partition_parameter: report_date + date_partition_offset: -14 + parameters: + - submission_date:DATE:{{ds}} +bigquery: + time_partitioning: + type: day + field: report_date + require_partition_filter: true + expiration_days: null + range_partitioning: null + clustering: + fields: + - client_id + - country +workgroup_access: +- role: roles/bigquery.dataViewer + members: + - workgroup:mozilla-confidential +references: {} diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/google_ads_derived/conversion_event_categorization_v1/query.sql /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_ads_derived/conversion_event_categorization_v1/query.sql --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/google_ads_derived/conversion_event_categorization_v1/query.sql 1970-01-01 00:00:00.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_ads_derived/conversion_event_categorization_v1/query.sql 2024-06-04 18:38:16.000000000 +0000 @@ -0,0 +1,131 @@ +--STEP 1: Get clients with a first seen date = submission date - 14 days +--Note: Min cohort date is 2023-11-01 so backfilling will return nothing before then +--Note: Max cohort date cannot be more than 7 days ago (to ensure we always have at least 7 days of data) +WITH clients_first_seen_14_days_ago AS ( + SELECT + client_id, + first_seen_date, + country, + attribution_campaign, + attribution_content, + attribution_dltoken, + attribution_medium, + attribution_source + FROM + `moz-fx-data-shared-prod.telemetry.clients_first_seen` --contains all new clients, including those that never sent a main ping + WHERE + first_seen_date = @submission_date --this is 14 days before {{ds}} + AND first_seen_date + BETWEEN '2023-11-01' + AND DATE_SUB(CURRENT_DATE, INTERVAL 8 DAY) +), +--Step 2: Get only the columns we need from clients last seen, for only the small window of time we need +clients_last_seen_raw AS ( + SELECT + cls.client_id, + cls.first_seen_date, + cls.country, + cls.submission_date, + cls.days_since_seen, + cls.active_hours_sum, + cls.days_visited_1_uri_bits, + cls.days_interacted_bits, + cls.search_with_ads_count_all + FROM + `moz-fx-data-shared-prod.telemetry.clients_last_seen` cls + JOIN + clients_first_seen_14_days_ago clients + ON cls.client_id = clients.client_id + WHERE + cls.submission_date >= '2023-11-01' --first cohort date + AND cls.submission_date + BETWEEN cls.first_seen_date + AND DATE_ADD(cls.first_seen_date, INTERVAL 6 DAY) --get first 7 days from their first main ping + --to process less data, we only check for pings between @submission date - 15 days and submission date + 15 days for each date this runs + AND cls.submission_date + BETWEEN DATE_SUB(@submission_date, INTERVAL 1 DAY) --15 days before DS + AND DATE_ADD(@submission_date, INTERVAL 29 DAY) --29 days after DS +), +--STEP 2: For every client, get the first 7 days worth of main pings sent after their first main ping +client_activity_first_7_days AS ( + SELECT + client_id, + ANY_VALUE( + first_seen_date + ) AS first_seen_date, --date we got first main ping (potentially different than above first seen date) + ANY_VALUE( + CASE + WHEN first_seen_date = submission_date + THEN country + END + ) AS country, --any country from their first 7 days of main pings + ANY_VALUE( + CASE + WHEN submission_date = DATE_ADD(first_seen_date, INTERVAL 6 DAY) + THEN BIT_COUNT(days_visited_1_uri_bits & days_interacted_bits) + END + ) AS dou, --total # of days of activity during their first 7 days of main pings + -- if a client doesn't send a ping on `submission_date` their last active day's value will be carried forward + -- so we only take measurements from days that they send a ping. + SUM( + CASE + WHEN days_since_seen = 0 + THEN COALESCE(active_hours_sum, 0) + ELSE 0 + END + ) AS active_hours_sum, + SUM( + CASE + WHEN days_since_seen = 0 + THEN COALESCE(search_with_ads_count_all, 0) + ELSE 0 + END + ) AS search_with_ads_count_all + FROM + clients_last_seen_raw + GROUP BY + client_id +), +combined AS ( + SELECT + cfs.client_id, + cfs.first_seen_date, + cfs.attribution_campaign, + cfs.attribution_content, + cfs.attribution_dltoken, + cfs.attribution_medium, + cfs.attribution_source, + @submission_date AS report_date, + IF(cls.first_seen_date IS NOT NULL, TRUE, FALSE) AS sent_main_ping_in_first_7_days, + COALESCE( + cls.country, + cfs.country + ) AS country, -- Conversion events & LTV are based on their first observed country in CLS, use that country if its available + COALESCE(dou, 0) AS dou, + COALESCE(active_hours_sum, 0) AS active_hours_sum, + COALESCE(search_with_ads_count_all, 0) AS search_with_ads_count_all + FROM + clients_first_seen_14_days_ago AS cfs + LEFT JOIN + client_activity_first_7_days AS cls + USING (client_id) +) +SELECT + client_id, + first_seen_date, + attribution_campaign, + attribution_content, + attribution_dltoken, + attribution_medium, + attribution_source, + report_date, + sent_main_ping_in_first_7_days, + country, + dou, + active_hours_sum, + search_with_ads_count_all, + IF(search_with_ads_count_all > 0 AND dou >= 5, TRUE, FALSE) AS event_1, + IF(search_with_ads_count_all > 0 AND dou >= 3, TRUE, FALSE) AS event_2, + IF(active_hours_sum >= 0.4 AND dou >= 3, TRUE, FALSE) AS event_3, +FROM + combined diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/google_ads_derived/conversion_event_categorization_v1/schema.yaml /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_ads_derived/conversion_event_categorization_v1/schema.yaml --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/google_ads_derived/conversion_event_categorization_v1/schema.yaml 1970-01-01 00:00:00.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_ads_derived/conversion_event_categorization_v1/schema.yaml 2024-06-04 18:38:16.000000000 +0000 @@ -0,0 +1,65 @@ +fields: +- mode: NULLABLE + name: client_id + type: STRING + description: Client ID +- mode: NULLABLE + name: first_seen_date + type: DATE + description: First Seen Date +- mode: NULLABLE + name: attribution_campaign + type: STRING + description: Attribution Campaign +- mode: NULLABLE + name: attribution_content + type: STRING + description: Attribution Content +- mode: NULLABLE + name: attribution_dltoken + type: STRING + description: Attribution Download Token +- mode: NULLABLE + name: attribution_medium + type: STRING + description: Attribution Medium +- mode: NULLABLE + name: attribution_source + type: STRING + description: Attribution Source +- mode: NULLABLE + name: report_date + type: DATE + description: Report Date +- mode: NULLABLE + name: sent_main_ping_in_first_7_days + type: BOOLEAN + description: Sent Main Ping In First 7 Days After First Seen Date Indicator +- mode: NULLABLE + name: country + type: STRING + description: Country +- mode: NULLABLE + name: dou + type: INT64 + description: DOU +- mode: NULLABLE + name: active_hours_sum + type: FLOAT + description: Active Hours Sum +- mode: NULLABLE + name: search_with_ads_count_all + type: INTEGER + description: Search With Ads Count All +- mode: NULLABLE + name: event_1 + type: BOOLEAN + description: Event 1 Indicator - 5 or more days of use and 1 or more search with ads (strictest event) +- mode: NULLABLE + name: event_2 + type: BOOLEAN + description: Event 2 Indicator - 3 or more days of use and 1 or more search with ads (medium event) +- mode: NULLABLE + name: event_3 + type: BOOLEAN + description: Event 3 Indicator - 3 or more days of use and 0.4 or more active hours (most lenient event) ```

Link to full diff

dataops-ci-bot commented 4 months ago

Integration report for "Merge branch 'main' into dsktp-conv-evnts"

sql.diff

Click to expand! ```diff Only in /tmp/workspace/generated-sql/dags/: bqetl_desktop_conv_evnt_categorization.py diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/dags/bqetl_analytics_tables.py /tmp/workspace/generated-sql/dags/bqetl_analytics_tables.py --- /tmp/workspace/main-generated-sql/dags/bqetl_analytics_tables.py 2024-06-04 18:51:02.000000000 +0000 +++ /tmp/workspace/generated-sql/dags/bqetl_analytics_tables.py 2024-06-04 18:51:25.000000000 +0000 @@ -324,6 +324,13 @@ ) ExternalTaskMarker( + task_id="bqetl_desktop_conv_evnt_categorization__wait_for_checks__fail_telemetry_derived__clients_first_seen__v2", + external_dag_id="bqetl_desktop_conv_evnt_categorization", + external_task_id="wait_for_checks__fail_telemetry_derived__clients_first_seen__v2", + execution_date="{{ (execution_date - macros.timedelta(days=-1, seconds=50400)).isoformat() }}", + ) + + ExternalTaskMarker( task_id="bqetl_mozilla_org_derived__wait_for_checks__fail_telemetry_derived__clients_first_seen__v2", external_dag_id="bqetl_mozilla_org_derived", external_task_id="wait_for_checks__fail_telemetry_derived__clients_first_seen__v2", diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/dags/bqetl_desktop_conv_evnt_categorization.py /tmp/workspace/generated-sql/dags/bqetl_desktop_conv_evnt_categorization.py --- /tmp/workspace/main-generated-sql/dags/bqetl_desktop_conv_evnt_categorization.py 1970-01-01 00:00:00.000000000 +0000 +++ /tmp/workspace/generated-sql/dags/bqetl_desktop_conv_evnt_categorization.py 2024-06-04 18:51:26.000000000 +0000 @@ -0,0 +1,116 @@ +# Generated via https://github.com/mozilla/bigquery-etl/blob/main/bigquery_etl/query_scheduling/generate_airflow_dags.py + +from airflow import DAG +from airflow.sensors.external_task import ExternalTaskMarker +from airflow.sensors.external_task import ExternalTaskSensor +from airflow.utils.task_group import TaskGroup +import datetime +from operators.gcp_container_operator import GKEPodOperator +from utils.constants import ALLOWED_STATES, FAILED_STATES +from utils.gcp import bigquery_etl_query, bigquery_dq_check + +docs = """ +### bqetl_desktop_conv_evnt_categorization + +Built from bigquery-etl repo, [`dags/bqetl_desktop_conv_evnt_categorization.py`](https://github.com/mozilla/bigquery-etl/blob/generated-sql/dags/bqetl_desktop_conv_evnt_categorization.py) + +#### Description + +Loads the desktop conversion event tables +#### Owner + +kwindau@mozilla.com + +#### Tags + +* impact/tier_2 +* repo/bigquery-etl +""" + + +default_args = { + "owner": "kwindau@mozilla.com", + "start_date": datetime.datetime(2024, 6, 4, 0, 0), + "end_date": None, + "email": ["kwindau@mozilla.com", "telemetry-alerts@mozilla.com"], + "depends_on_past": False, + "retry_delay": datetime.timedelta(seconds=1800), + "email_on_failure": True, + "email_on_retry": False, + "retries": 2, +} + +tags = ["impact/tier_2", "repo/bigquery-etl"] + +with DAG( + "bqetl_desktop_conv_evnt_categorization", + default_args=default_args, + schedule_interval="0 12 * * *", + doc_md=docs, + tags=tags, +) as dag: + + wait_for_checks__fail_telemetry_derived__clients_first_seen__v2 = ( + ExternalTaskSensor( + task_id="wait_for_checks__fail_telemetry_derived__clients_first_seen__v2", + external_dag_id="bqetl_analytics_tables", + external_task_id="checks__fail_telemetry_derived__clients_first_seen__v2", + execution_delta=datetime.timedelta(seconds=36000), + check_existence=True, + mode="reschedule", + allowed_states=ALLOWED_STATES, + failed_states=FAILED_STATES, + pool="DATA_ENG_EXTERNALTASKSENSOR", + ) + ) + + wait_for_checks__fail_telemetry_derived__clients_last_seen__v2 = ExternalTaskSensor( + task_id="wait_for_checks__fail_telemetry_derived__clients_last_seen__v2", + external_dag_id="bqetl_main_summary", + external_task_id="checks__fail_telemetry_derived__clients_last_seen__v2", + execution_delta=datetime.timedelta(seconds=36000), + check_existence=True, + mode="reschedule", + allowed_states=ALLOWED_STATES, + failed_states=FAILED_STATES, + pool="DATA_ENG_EXTERNALTASKSENSOR", + ) + + checks__warn_google_ads_derived__conversion_event_categorization__v1 = bigquery_dq_check( + task_id="checks__warn_google_ads_derived__conversion_event_categorization__v1", + source_table='conversion_event_categorization_v1${{ macros.ds_format(macros.ds_add(ds, -14), "%Y-%m-%d", "%Y%m%d") }}', + dataset_id="google_ads_derived", + project_id="moz-fx-data-shared-prod", + is_dq_check_fail=False, + owner="kwindau@mozilla.com", + email=["kwindau@mozilla.com", "telemetry-alerts@mozilla.com"], + depends_on_past=False, + parameters=["first_seen_date:DATE:{{macros.ds_add(ds, -14)}}"] + + ["submission_date:DATE:{{ds}}"], + retries=0, + ) + + google_ads_derived__conversion_event_categorization__v1 = bigquery_etl_query( + task_id="google_ads_derived__conversion_event_categorization__v1", + destination_table='conversion_event_categorization_v1${{ macros.ds_format(macros.ds_add(ds, -14), "%Y-%m-%d", "%Y%m%d") }}', + dataset_id="google_ads_derived", + project_id="moz-fx-data-shared-prod", + owner="kwindau@mozilla.com", + email=["kwindau@mozilla.com", "telemetry-alerts@mozilla.com"], + date_partition_parameter=None, + depends_on_past=False, + parameters=["first_seen_date:DATE:{{macros.ds_add(ds, -14)}}"] + + ["submission_date:DATE:{{ds}}"], + ) + + checks__warn_google_ads_derived__conversion_event_categorization__v1.set_upstream( + google_ads_derived__conversion_event_categorization__v1 + ) + + google_ads_derived__conversion_event_categorization__v1.set_upstream( + wait_for_checks__fail_telemetry_derived__clients_first_seen__v2 + ) + + google_ads_derived__conversion_event_categorization__v1.set_upstream( + wait_for_checks__fail_telemetry_derived__clients_last_seen__v2 + ) diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/dags/bqetl_main_summary.py /tmp/workspace/generated-sql/dags/bqetl_main_summary.py --- /tmp/workspace/main-generated-sql/dags/bqetl_main_summary.py 2024-06-04 18:51:02.000000000 +0000 +++ /tmp/workspace/generated-sql/dags/bqetl_main_summary.py 2024-06-04 18:51:23.000000000 +0000 @@ -144,6 +144,13 @@ ) ExternalTaskMarker( + task_id="bqetl_desktop_conv_evnt_categorization__wait_for_checks__fail_telemetry_derived__clients_last_seen__v2", + external_dag_id="bqetl_desktop_conv_evnt_categorization", + external_task_id="wait_for_checks__fail_telemetry_derived__clients_last_seen__v2", + execution_date="{{ (execution_date - macros.timedelta(days=-1, seconds=50400)).isoformat() }}", + ) + + ExternalTaskMarker( task_id="bqetl_search_dashboard__wait_for_checks__fail_telemetry_derived__clients_last_seen__v2", external_dag_id="bqetl_search_dashboard", external_task_id="wait_for_checks__fail_telemetry_derived__clients_last_seen__v2", Only in /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_ads: conversion_event_categorization Only in /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_ads_derived: conversion_event_categorization_v1 diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/google_ads/conversion_event_categorization/metadata.yaml /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_ads/conversion_event_categorization/metadata.yaml --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/google_ads/conversion_event_categorization/metadata.yaml 1970-01-01 00:00:00.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_ads/conversion_event_categorization/metadata.yaml 2024-06-04 18:48:43.000000000 +0000 @@ -0,0 +1,13 @@ +friendly_name: Conversion Event Categorization +description: |- + Please provide a description for the query +owners: [] +labels: {} +bigquery: null +workgroup_access: +- role: roles/bigquery.dataViewer + members: + - workgroup:mozilla-confidential +references: + view.sql: + - moz-fx-data-shared-prod.google_ads_derived.conversion_event_categorization_v1 diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/google_ads/conversion_event_categorization/view.sql /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_ads/conversion_event_categorization/view.sql --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/google_ads/conversion_event_categorization/view.sql 1970-01-01 00:00:00.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_ads/conversion_event_categorization/view.sql 2024-06-04 18:47:07.000000000 +0000 @@ -0,0 +1,7 @@ +CREATE OR REPLACE VIEW + `moz-fx-data-shared-prod.google_ads.conversion_event_categorization` +AS +SELECT + * +FROM + `moz-fx-data-shared-prod.google_ads_derived.conversion_event_categorization_v1` diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/google_ads_derived/conversion_event_categorization_v1/checks.sql /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_ads_derived/conversion_event_categorization_v1/checks.sql --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/google_ads_derived/conversion_event_categorization_v1/checks.sql 1970-01-01 00:00:00.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_ads_derived/conversion_event_categorization_v1/checks.sql 2024-06-04 18:47:07.000000000 +0000 @@ -0,0 +1,2 @@ +#warn +{{ is_unique(["client_id"]) }} diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/google_ads_derived/conversion_event_categorization_v1/metadata.yaml /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_ads_derived/conversion_event_categorization_v1/metadata.yaml --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/google_ads_derived/conversion_event_categorization_v1/metadata.yaml 1970-01-01 00:00:00.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_ads_derived/conversion_event_categorization_v1/metadata.yaml 2024-06-04 18:48:43.000000000 +0000 @@ -0,0 +1,32 @@ +friendly_name: Conversion Event Categorization +description: |- + Classifies conversion events +owners: +- kwindau@mozilla.com +labels: + incremental: true + owner1: kwindau + dag: bqetl_desktop_conv_evnt_categorization +scheduling: + dag_name: bqetl_desktop_conv_evnt_categorization + depends_on_past: false + date_partition_parameter: first_seen_date + date_partition_offset: -14 + parameters: + - submission_date:DATE:{{ds}} +bigquery: + time_partitioning: + type: day + field: first_seen_date + require_partition_filter: true + expiration_days: null + range_partitioning: null + clustering: + fields: + - client_id + - country +workgroup_access: +- role: roles/bigquery.dataViewer + members: + - workgroup:mozilla-confidential +references: {} diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/google_ads_derived/conversion_event_categorization_v1/query.sql /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_ads_derived/conversion_event_categorization_v1/query.sql --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/google_ads_derived/conversion_event_categorization_v1/query.sql 1970-01-01 00:00:00.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_ads_derived/conversion_event_categorization_v1/query.sql 2024-06-04 18:47:07.000000000 +0000 @@ -0,0 +1,131 @@ +--STEP 1: Get clients with a first seen date = submission date - 14 days +--Note: Min cohort date is 2023-11-01 so backfilling will return nothing before then +--Note: Max cohort date cannot be more than 7 days ago (to ensure we always have at least 7 days of data) +WITH clients_first_seen_14_days_ago AS ( + SELECT + client_id, + first_seen_date, + country, + attribution_campaign, + attribution_content, + attribution_dltoken, + attribution_medium, + attribution_source + FROM + `moz-fx-data-shared-prod.telemetry.clients_first_seen` --contains all new clients, including those that never sent a main ping + WHERE + first_seen_date = @submission_date --this is 14 days before {{ds}} + AND first_seen_date + BETWEEN '2023-11-01' + AND DATE_SUB(CURRENT_DATE, INTERVAL 8 DAY) +), +--Step 2: Get only the columns we need from clients last seen, for only the small window of time we need +clients_last_seen_raw AS ( + SELECT + cls.client_id, + cls.first_seen_date, + cls.country, + cls.submission_date, + cls.days_since_seen, + cls.active_hours_sum, + cls.days_visited_1_uri_bits, + cls.days_interacted_bits, + cls.search_with_ads_count_all + FROM + `moz-fx-data-shared-prod.telemetry.clients_last_seen` cls + JOIN + clients_first_seen_14_days_ago clients + ON cls.client_id = clients.client_id + WHERE + cls.submission_date >= '2023-11-01' --first cohort date + AND cls.submission_date + BETWEEN cls.first_seen_date + AND DATE_ADD(cls.first_seen_date, INTERVAL 6 DAY) --get first 7 days from their first main ping + --to process less data, we only check for pings between @submission date - 15 days and submission date + 15 days for each date this runs + AND cls.submission_date + BETWEEN DATE_SUB(@submission_date, INTERVAL 1 DAY) --15 days before DS + AND DATE_ADD(@submission_date, INTERVAL 29 DAY) --29 days after DS +), +--STEP 2: For every client, get the first 7 days worth of main pings sent after their first main ping +client_activity_first_7_days AS ( + SELECT + client_id, + ANY_VALUE( + first_seen_date + ) AS first_seen_date, --date we got first main ping (potentially different than above first seen date) + ANY_VALUE( + CASE + WHEN first_seen_date = submission_date + THEN country + END + ) AS country, --any country from their first 7 days of main pings + ANY_VALUE( + CASE + WHEN submission_date = DATE_ADD(first_seen_date, INTERVAL 6 DAY) + THEN BIT_COUNT(days_visited_1_uri_bits & days_interacted_bits) + END + ) AS dou, --total # of days of activity during their first 7 days of main pings + -- if a client doesn't send a ping on `submission_date` their last active day's value will be carried forward + -- so we only take measurements from days that they send a ping. + SUM( + CASE + WHEN days_since_seen = 0 + THEN COALESCE(active_hours_sum, 0) + ELSE 0 + END + ) AS active_hours_sum, + SUM( + CASE + WHEN days_since_seen = 0 + THEN COALESCE(search_with_ads_count_all, 0) + ELSE 0 + END + ) AS search_with_ads_count_all + FROM + clients_last_seen_raw + GROUP BY + client_id +), +combined AS ( + SELECT + cfs.client_id, + cfs.first_seen_date, + cfs.attribution_campaign, + cfs.attribution_content, + cfs.attribution_dltoken, + cfs.attribution_medium, + cfs.attribution_source, + @submission_date AS report_date, + IF(cls.first_seen_date IS NOT NULL, TRUE, FALSE) AS sent_main_ping_in_first_7_days, + COALESCE( + cls.country, + cfs.country + ) AS country, -- Conversion events & LTV are based on their first observed country in CLS, use that country if its available + COALESCE(dou, 0) AS dou, + COALESCE(active_hours_sum, 0) AS active_hours_sum, + COALESCE(search_with_ads_count_all, 0) AS search_with_ads_count_all + FROM + clients_first_seen_14_days_ago AS cfs + LEFT JOIN + client_activity_first_7_days AS cls + USING (client_id) +) +SELECT + client_id, + first_seen_date, + attribution_campaign, + attribution_content, + attribution_dltoken, + attribution_medium, + attribution_source, + report_date, + sent_main_ping_in_first_7_days, + country, + dou, + active_hours_sum, + search_with_ads_count_all, + IF(search_with_ads_count_all > 0 AND dou >= 5, TRUE, FALSE) AS event_1, + IF(search_with_ads_count_all > 0 AND dou >= 3, TRUE, FALSE) AS event_2, + IF(active_hours_sum >= 0.4 AND dou >= 3, TRUE, FALSE) AS event_3, +FROM + combined diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/google_ads_derived/conversion_event_categorization_v1/schema.yaml /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_ads_derived/conversion_event_categorization_v1/schema.yaml --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/google_ads_derived/conversion_event_categorization_v1/schema.yaml 1970-01-01 00:00:00.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_ads_derived/conversion_event_categorization_v1/schema.yaml 2024-06-04 18:47:07.000000000 +0000 @@ -0,0 +1,65 @@ +fields: +- mode: NULLABLE + name: client_id + type: STRING + description: Client ID +- mode: NULLABLE + name: first_seen_date + type: DATE + description: First Seen Date +- mode: NULLABLE + name: attribution_campaign + type: STRING + description: Attribution Campaign +- mode: NULLABLE + name: attribution_content + type: STRING + description: Attribution Content +- mode: NULLABLE + name: attribution_dltoken + type: STRING + description: Attribution Download Token +- mode: NULLABLE + name: attribution_medium + type: STRING + description: Attribution Medium +- mode: NULLABLE + name: attribution_source + type: STRING + description: Attribution Source +- mode: NULLABLE + name: report_date + type: DATE + description: Report Date +- mode: NULLABLE + name: sent_main_ping_in_first_7_days + type: BOOLEAN + description: Sent Main Ping In First 7 Days After First Seen Date Indicator +- mode: NULLABLE + name: country + type: STRING + description: Country +- mode: NULLABLE + name: dou + type: INT64 + description: DOU +- mode: NULLABLE + name: active_hours_sum + type: FLOAT + description: Active Hours Sum +- mode: NULLABLE + name: search_with_ads_count_all + type: INTEGER + description: Search With Ads Count All +- mode: NULLABLE + name: event_1 + type: BOOLEAN + description: Event 1 Indicator - 5 or more days of use and 1 or more search with ads (strictest event) +- mode: NULLABLE + name: event_2 + type: BOOLEAN + description: Event 2 Indicator - 3 or more days of use and 1 or more search with ads (medium event) +- mode: NULLABLE + name: event_3 + type: BOOLEAN + description: Event 3 Indicator - 3 or more days of use and 0.4 or more active hours (most lenient event) ```

Link to full diff

dataops-ci-bot commented 4 months ago

Integration report for "Fix comment on country"

sql.diff

Click to expand! ```diff Only in /tmp/workspace/generated-sql/dags/: bqetl_desktop_conv_evnt_categorization.py diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/dags/bqetl_analytics_tables.py /tmp/workspace/generated-sql/dags/bqetl_analytics_tables.py --- /tmp/workspace/main-generated-sql/dags/bqetl_analytics_tables.py 2024-06-04 18:51:03.000000000 +0000 +++ /tmp/workspace/generated-sql/dags/bqetl_analytics_tables.py 2024-06-04 18:53:13.000000000 +0000 @@ -324,6 +324,13 @@ ) ExternalTaskMarker( + task_id="bqetl_desktop_conv_evnt_categorization__wait_for_checks__fail_telemetry_derived__clients_first_seen__v2", + external_dag_id="bqetl_desktop_conv_evnt_categorization", + external_task_id="wait_for_checks__fail_telemetry_derived__clients_first_seen__v2", + execution_date="{{ (execution_date - macros.timedelta(days=-1, seconds=50400)).isoformat() }}", + ) + + ExternalTaskMarker( task_id="bqetl_mozilla_org_derived__wait_for_checks__fail_telemetry_derived__clients_first_seen__v2", external_dag_id="bqetl_mozilla_org_derived", external_task_id="wait_for_checks__fail_telemetry_derived__clients_first_seen__v2", diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/dags/bqetl_desktop_conv_evnt_categorization.py /tmp/workspace/generated-sql/dags/bqetl_desktop_conv_evnt_categorization.py --- /tmp/workspace/main-generated-sql/dags/bqetl_desktop_conv_evnt_categorization.py 1970-01-01 00:00:00.000000000 +0000 +++ /tmp/workspace/generated-sql/dags/bqetl_desktop_conv_evnt_categorization.py 2024-06-04 18:53:14.000000000 +0000 @@ -0,0 +1,116 @@ +# Generated via https://github.com/mozilla/bigquery-etl/blob/main/bigquery_etl/query_scheduling/generate_airflow_dags.py + +from airflow import DAG +from airflow.sensors.external_task import ExternalTaskMarker +from airflow.sensors.external_task import ExternalTaskSensor +from airflow.utils.task_group import TaskGroup +import datetime +from operators.gcp_container_operator import GKEPodOperator +from utils.constants import ALLOWED_STATES, FAILED_STATES +from utils.gcp import bigquery_etl_query, bigquery_dq_check + +docs = """ +### bqetl_desktop_conv_evnt_categorization + +Built from bigquery-etl repo, [`dags/bqetl_desktop_conv_evnt_categorization.py`](https://github.com/mozilla/bigquery-etl/blob/generated-sql/dags/bqetl_desktop_conv_evnt_categorization.py) + +#### Description + +Loads the desktop conversion event tables +#### Owner + +kwindau@mozilla.com + +#### Tags + +* impact/tier_2 +* repo/bigquery-etl +""" + + +default_args = { + "owner": "kwindau@mozilla.com", + "start_date": datetime.datetime(2024, 6, 4, 0, 0), + "end_date": None, + "email": ["kwindau@mozilla.com", "telemetry-alerts@mozilla.com"], + "depends_on_past": False, + "retry_delay": datetime.timedelta(seconds=1800), + "email_on_failure": True, + "email_on_retry": False, + "retries": 2, +} + +tags = ["impact/tier_2", "repo/bigquery-etl"] + +with DAG( + "bqetl_desktop_conv_evnt_categorization", + default_args=default_args, + schedule_interval="0 12 * * *", + doc_md=docs, + tags=tags, +) as dag: + + wait_for_checks__fail_telemetry_derived__clients_first_seen__v2 = ( + ExternalTaskSensor( + task_id="wait_for_checks__fail_telemetry_derived__clients_first_seen__v2", + external_dag_id="bqetl_analytics_tables", + external_task_id="checks__fail_telemetry_derived__clients_first_seen__v2", + execution_delta=datetime.timedelta(seconds=36000), + check_existence=True, + mode="reschedule", + allowed_states=ALLOWED_STATES, + failed_states=FAILED_STATES, + pool="DATA_ENG_EXTERNALTASKSENSOR", + ) + ) + + wait_for_checks__fail_telemetry_derived__clients_last_seen__v2 = ExternalTaskSensor( + task_id="wait_for_checks__fail_telemetry_derived__clients_last_seen__v2", + external_dag_id="bqetl_main_summary", + external_task_id="checks__fail_telemetry_derived__clients_last_seen__v2", + execution_delta=datetime.timedelta(seconds=36000), + check_existence=True, + mode="reschedule", + allowed_states=ALLOWED_STATES, + failed_states=FAILED_STATES, + pool="DATA_ENG_EXTERNALTASKSENSOR", + ) + + checks__warn_google_ads_derived__conversion_event_categorization__v1 = bigquery_dq_check( + task_id="checks__warn_google_ads_derived__conversion_event_categorization__v1", + source_table='conversion_event_categorization_v1${{ macros.ds_format(macros.ds_add(ds, -14), "%Y-%m-%d", "%Y%m%d") }}', + dataset_id="google_ads_derived", + project_id="moz-fx-data-shared-prod", + is_dq_check_fail=False, + owner="kwindau@mozilla.com", + email=["kwindau@mozilla.com", "telemetry-alerts@mozilla.com"], + depends_on_past=False, + parameters=["report_date:DATE:{{macros.ds_add(ds, -14)}}"] + + ["submission_date:DATE:{{ds}}"], + retries=0, + ) + + google_ads_derived__conversion_event_categorization__v1 = bigquery_etl_query( + task_id="google_ads_derived__conversion_event_categorization__v1", + destination_table='conversion_event_categorization_v1${{ macros.ds_format(macros.ds_add(ds, -14), "%Y-%m-%d", "%Y%m%d") }}', + dataset_id="google_ads_derived", + project_id="moz-fx-data-shared-prod", + owner="kwindau@mozilla.com", + email=["kwindau@mozilla.com", "telemetry-alerts@mozilla.com"], + date_partition_parameter=None, + depends_on_past=False, + parameters=["report_date:DATE:{{macros.ds_add(ds, -14)}}"] + + ["submission_date:DATE:{{ds}}"], + ) + + checks__warn_google_ads_derived__conversion_event_categorization__v1.set_upstream( + google_ads_derived__conversion_event_categorization__v1 + ) + + google_ads_derived__conversion_event_categorization__v1.set_upstream( + wait_for_checks__fail_telemetry_derived__clients_first_seen__v2 + ) + + google_ads_derived__conversion_event_categorization__v1.set_upstream( + wait_for_checks__fail_telemetry_derived__clients_last_seen__v2 + ) diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/dags/bqetl_main_summary.py /tmp/workspace/generated-sql/dags/bqetl_main_summary.py --- /tmp/workspace/main-generated-sql/dags/bqetl_main_summary.py 2024-06-04 18:51:03.000000000 +0000 +++ /tmp/workspace/generated-sql/dags/bqetl_main_summary.py 2024-06-04 18:53:10.000000000 +0000 @@ -144,6 +144,13 @@ ) ExternalTaskMarker( + task_id="bqetl_desktop_conv_evnt_categorization__wait_for_checks__fail_telemetry_derived__clients_last_seen__v2", + external_dag_id="bqetl_desktop_conv_evnt_categorization", + external_task_id="wait_for_checks__fail_telemetry_derived__clients_last_seen__v2", + execution_date="{{ (execution_date - macros.timedelta(days=-1, seconds=50400)).isoformat() }}", + ) + + ExternalTaskMarker( task_id="bqetl_search_dashboard__wait_for_checks__fail_telemetry_derived__clients_last_seen__v2", external_dag_id="bqetl_search_dashboard", external_task_id="wait_for_checks__fail_telemetry_derived__clients_last_seen__v2", Only in /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_ads: conversion_event_categorization Only in /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_ads_derived: conversion_event_categorization_v1 diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/google_ads/conversion_event_categorization/metadata.yaml /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_ads/conversion_event_categorization/metadata.yaml --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/google_ads/conversion_event_categorization/metadata.yaml 1970-01-01 00:00:00.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_ads/conversion_event_categorization/metadata.yaml 2024-06-04 18:50:05.000000000 +0000 @@ -0,0 +1,13 @@ +friendly_name: Conversion Event Categorization +description: |- + Please provide a description for the query +owners: [] +labels: {} +bigquery: null +workgroup_access: +- role: roles/bigquery.dataViewer + members: + - workgroup:mozilla-confidential +references: + view.sql: + - moz-fx-data-shared-prod.google_ads_derived.conversion_event_categorization_v1 diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/google_ads/conversion_event_categorization/view.sql /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_ads/conversion_event_categorization/view.sql --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/google_ads/conversion_event_categorization/view.sql 1970-01-01 00:00:00.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_ads/conversion_event_categorization/view.sql 2024-06-04 18:48:05.000000000 +0000 @@ -0,0 +1,7 @@ +CREATE OR REPLACE VIEW + `moz-fx-data-shared-prod.google_ads.conversion_event_categorization` +AS +SELECT + * +FROM + `moz-fx-data-shared-prod.google_ads_derived.conversion_event_categorization_v1` diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/google_ads_derived/conversion_event_categorization_v1/checks.sql /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_ads_derived/conversion_event_categorization_v1/checks.sql --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/google_ads_derived/conversion_event_categorization_v1/checks.sql 1970-01-01 00:00:00.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_ads_derived/conversion_event_categorization_v1/checks.sql 2024-06-04 18:48:05.000000000 +0000 @@ -0,0 +1,2 @@ +#warn +{{ is_unique(["client_id"]) }} diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/google_ads_derived/conversion_event_categorization_v1/metadata.yaml /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_ads_derived/conversion_event_categorization_v1/metadata.yaml --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/google_ads_derived/conversion_event_categorization_v1/metadata.yaml 1970-01-01 00:00:00.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_ads_derived/conversion_event_categorization_v1/metadata.yaml 2024-06-04 18:50:05.000000000 +0000 @@ -0,0 +1,32 @@ +friendly_name: Conversion Event Categorization +description: |- + Classifies conversion events +owners: +- kwindau@mozilla.com +labels: + incremental: true + owner1: kwindau + dag: bqetl_desktop_conv_evnt_categorization +scheduling: + dag_name: bqetl_desktop_conv_evnt_categorization + depends_on_past: false + date_partition_parameter: report_date + date_partition_offset: -14 + parameters: + - submission_date:DATE:{{ds}} +bigquery: + time_partitioning: + type: day + field: report_date + require_partition_filter: true + expiration_days: null + range_partitioning: null + clustering: + fields: + - client_id + - country +workgroup_access: +- role: roles/bigquery.dataViewer + members: + - workgroup:mozilla-confidential +references: {} diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/google_ads_derived/conversion_event_categorization_v1/query.sql /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_ads_derived/conversion_event_categorization_v1/query.sql --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/google_ads_derived/conversion_event_categorization_v1/query.sql 1970-01-01 00:00:00.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_ads_derived/conversion_event_categorization_v1/query.sql 2024-06-04 18:48:05.000000000 +0000 @@ -0,0 +1,131 @@ +--STEP 1: Get clients with a first seen date = submission date - 14 days +--Note: Min cohort date is 2023-11-01 so backfilling will return nothing before then +--Note: Max cohort date cannot be more than 7 days ago (to ensure we always have at least 7 days of data) +WITH clients_first_seen_14_days_ago AS ( + SELECT + client_id, + first_seen_date, + country, + attribution_campaign, + attribution_content, + attribution_dltoken, + attribution_medium, + attribution_source + FROM + `moz-fx-data-shared-prod.telemetry.clients_first_seen` --contains all new clients, including those that never sent a main ping + WHERE + first_seen_date = @submission_date --this is 14 days before {{ds}} + AND first_seen_date + BETWEEN '2023-11-01' + AND DATE_SUB(CURRENT_DATE, INTERVAL 8 DAY) +), +--Step 2: Get only the columns we need from clients last seen, for only the small window of time we need +clients_last_seen_raw AS ( + SELECT + cls.client_id, + cls.first_seen_date, + cls.country, + cls.submission_date, + cls.days_since_seen, + cls.active_hours_sum, + cls.days_visited_1_uri_bits, + cls.days_interacted_bits, + cls.search_with_ads_count_all + FROM + `moz-fx-data-shared-prod.telemetry.clients_last_seen` cls + JOIN + clients_first_seen_14_days_ago clients + ON cls.client_id = clients.client_id + WHERE + cls.submission_date >= '2023-11-01' --first cohort date + AND cls.submission_date + BETWEEN cls.first_seen_date + AND DATE_ADD(cls.first_seen_date, INTERVAL 6 DAY) --get first 7 days from their first main ping + --to process less data, we only check for pings between @submission date - 15 days and submission date + 15 days for each date this runs + AND cls.submission_date + BETWEEN DATE_SUB(@submission_date, INTERVAL 1 DAY) --15 days before DS + AND DATE_ADD(@submission_date, INTERVAL 29 DAY) --29 days after DS +), +--STEP 2: For every client, get the first 7 days worth of main pings sent after their first main ping +client_activity_first_7_days AS ( + SELECT + client_id, + ANY_VALUE( + first_seen_date + ) AS first_seen_date, --date we got first main ping (potentially different than above first seen date) + ANY_VALUE( + CASE + WHEN first_seen_date = submission_date + THEN country + END + ) AS country, --any country from their first day in clients_last_seen + ANY_VALUE( + CASE + WHEN submission_date = DATE_ADD(first_seen_date, INTERVAL 6 DAY) + THEN BIT_COUNT(days_visited_1_uri_bits & days_interacted_bits) + END + ) AS dou, --total # of days of activity during their first 7 days of main pings + -- if a client doesn't send a ping on `submission_date` their last active day's value will be carried forward + -- so we only take measurements from days that they send a ping. + SUM( + CASE + WHEN days_since_seen = 0 + THEN COALESCE(active_hours_sum, 0) + ELSE 0 + END + ) AS active_hours_sum, + SUM( + CASE + WHEN days_since_seen = 0 + THEN COALESCE(search_with_ads_count_all, 0) + ELSE 0 + END + ) AS search_with_ads_count_all + FROM + clients_last_seen_raw + GROUP BY + client_id +), +combined AS ( + SELECT + cfs.client_id, + cfs.first_seen_date, + cfs.attribution_campaign, + cfs.attribution_content, + cfs.attribution_dltoken, + cfs.attribution_medium, + cfs.attribution_source, + @submission_date AS report_date, + IF(cls.first_seen_date IS NOT NULL, TRUE, FALSE) AS sent_main_ping_in_first_7_days, + COALESCE( + cls.country, + cfs.country + ) AS country, -- Conversion events & LTV are based on their first observed country in CLS, use that country if its available + COALESCE(dou, 0) AS dou, + COALESCE(active_hours_sum, 0) AS active_hours_sum, + COALESCE(search_with_ads_count_all, 0) AS search_with_ads_count_all + FROM + clients_first_seen_14_days_ago AS cfs + LEFT JOIN + client_activity_first_7_days AS cls + USING (client_id) +) +SELECT + client_id, + first_seen_date, + attribution_campaign, + attribution_content, + attribution_dltoken, + attribution_medium, + attribution_source, + report_date, + sent_main_ping_in_first_7_days, + country, + dou, + active_hours_sum, + search_with_ads_count_all, + IF(search_with_ads_count_all > 0 AND dou >= 5, TRUE, FALSE) AS event_1, + IF(search_with_ads_count_all > 0 AND dou >= 3, TRUE, FALSE) AS event_2, + IF(active_hours_sum >= 0.4 AND dou >= 3, TRUE, FALSE) AS event_3, +FROM + combined diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/google_ads_derived/conversion_event_categorization_v1/schema.yaml /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_ads_derived/conversion_event_categorization_v1/schema.yaml --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/google_ads_derived/conversion_event_categorization_v1/schema.yaml 1970-01-01 00:00:00.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_ads_derived/conversion_event_categorization_v1/schema.yaml 2024-06-04 18:48:05.000000000 +0000 @@ -0,0 +1,65 @@ +fields: +- mode: NULLABLE + name: client_id + type: STRING + description: Client ID +- mode: NULLABLE + name: first_seen_date + type: DATE + description: First Seen Date +- mode: NULLABLE + name: attribution_campaign + type: STRING + description: Attribution Campaign +- mode: NULLABLE + name: attribution_content + type: STRING + description: Attribution Content +- mode: NULLABLE + name: attribution_dltoken + type: STRING + description: Attribution Download Token +- mode: NULLABLE + name: attribution_medium + type: STRING + description: Attribution Medium +- mode: NULLABLE + name: attribution_source + type: STRING + description: Attribution Source +- mode: NULLABLE + name: report_date + type: DATE + description: Report Date +- mode: NULLABLE + name: sent_main_ping_in_first_7_days + type: BOOLEAN + description: Sent Main Ping In First 7 Days After First Seen Date Indicator +- mode: NULLABLE + name: country + type: STRING + description: Country +- mode: NULLABLE + name: dou + type: INT64 + description: DOU +- mode: NULLABLE + name: active_hours_sum + type: FLOAT + description: Active Hours Sum +- mode: NULLABLE + name: search_with_ads_count_all + type: INTEGER + description: Search With Ads Count All +- mode: NULLABLE + name: event_1 + type: BOOLEAN + description: Event 1 Indicator - 5 or more days of use and 1 or more search with ads (strictest event) +- mode: NULLABLE + name: event_2 + type: BOOLEAN + description: Event 2 Indicator - 3 or more days of use and 1 or more search with ads (medium event) +- mode: NULLABLE + name: event_3 + type: BOOLEAN + description: Event 3 Indicator - 3 or more days of use and 0.4 or more active hours (most lenient event) ```

Link to full diff

dataops-ci-bot commented 4 months ago

Integration report for "update submission date to report date"

sql.diff

Click to expand! ```diff Only in /tmp/workspace/generated-sql/dags/: bqetl_desktop_conv_evnt_categorization.py diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/dags/bqetl_analytics_tables.py /tmp/workspace/generated-sql/dags/bqetl_analytics_tables.py --- /tmp/workspace/main-generated-sql/dags/bqetl_analytics_tables.py 2024-06-04 19:50:36.000000000 +0000 +++ /tmp/workspace/generated-sql/dags/bqetl_analytics_tables.py 2024-06-04 19:52:20.000000000 +0000 @@ -324,6 +324,13 @@ ) ExternalTaskMarker( + task_id="bqetl_desktop_conv_evnt_categorization__wait_for_checks__fail_telemetry_derived__clients_first_seen__v2", + external_dag_id="bqetl_desktop_conv_evnt_categorization", + external_task_id="wait_for_checks__fail_telemetry_derived__clients_first_seen__v2", + execution_date="{{ (execution_date - macros.timedelta(days=-1, seconds=50400)).isoformat() }}", + ) + + ExternalTaskMarker( task_id="bqetl_mozilla_org_derived__wait_for_checks__fail_telemetry_derived__clients_first_seen__v2", external_dag_id="bqetl_mozilla_org_derived", external_task_id="wait_for_checks__fail_telemetry_derived__clients_first_seen__v2", diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/dags/bqetl_desktop_conv_evnt_categorization.py /tmp/workspace/generated-sql/dags/bqetl_desktop_conv_evnt_categorization.py --- /tmp/workspace/main-generated-sql/dags/bqetl_desktop_conv_evnt_categorization.py 1970-01-01 00:00:00.000000000 +0000 +++ /tmp/workspace/generated-sql/dags/bqetl_desktop_conv_evnt_categorization.py 2024-06-04 19:52:21.000000000 +0000 @@ -0,0 +1,116 @@ +# Generated via https://github.com/mozilla/bigquery-etl/blob/main/bigquery_etl/query_scheduling/generate_airflow_dags.py + +from airflow import DAG +from airflow.sensors.external_task import ExternalTaskMarker +from airflow.sensors.external_task import ExternalTaskSensor +from airflow.utils.task_group import TaskGroup +import datetime +from operators.gcp_container_operator import GKEPodOperator +from utils.constants import ALLOWED_STATES, FAILED_STATES +from utils.gcp import bigquery_etl_query, bigquery_dq_check + +docs = """ +### bqetl_desktop_conv_evnt_categorization + +Built from bigquery-etl repo, [`dags/bqetl_desktop_conv_evnt_categorization.py`](https://github.com/mozilla/bigquery-etl/blob/generated-sql/dags/bqetl_desktop_conv_evnt_categorization.py) + +#### Description + +Loads the desktop conversion event tables +#### Owner + +kwindau@mozilla.com + +#### Tags + +* impact/tier_2 +* repo/bigquery-etl +""" + + +default_args = { + "owner": "kwindau@mozilla.com", + "start_date": datetime.datetime(2024, 6, 4, 0, 0), + "end_date": None, + "email": ["kwindau@mozilla.com", "telemetry-alerts@mozilla.com"], + "depends_on_past": False, + "retry_delay": datetime.timedelta(seconds=1800), + "email_on_failure": True, + "email_on_retry": False, + "retries": 2, +} + +tags = ["impact/tier_2", "repo/bigquery-etl"] + +with DAG( + "bqetl_desktop_conv_evnt_categorization", + default_args=default_args, + schedule_interval="0 12 * * *", + doc_md=docs, + tags=tags, +) as dag: + + wait_for_checks__fail_telemetry_derived__clients_first_seen__v2 = ( + ExternalTaskSensor( + task_id="wait_for_checks__fail_telemetry_derived__clients_first_seen__v2", + external_dag_id="bqetl_analytics_tables", + external_task_id="checks__fail_telemetry_derived__clients_first_seen__v2", + execution_delta=datetime.timedelta(seconds=36000), + check_existence=True, + mode="reschedule", + allowed_states=ALLOWED_STATES, + failed_states=FAILED_STATES, + pool="DATA_ENG_EXTERNALTASKSENSOR", + ) + ) + + wait_for_checks__fail_telemetry_derived__clients_last_seen__v2 = ExternalTaskSensor( + task_id="wait_for_checks__fail_telemetry_derived__clients_last_seen__v2", + external_dag_id="bqetl_main_summary", + external_task_id="checks__fail_telemetry_derived__clients_last_seen__v2", + execution_delta=datetime.timedelta(seconds=36000), + check_existence=True, + mode="reschedule", + allowed_states=ALLOWED_STATES, + failed_states=FAILED_STATES, + pool="DATA_ENG_EXTERNALTASKSENSOR", + ) + + checks__warn_google_ads_derived__conversion_event_categorization__v1 = bigquery_dq_check( + task_id="checks__warn_google_ads_derived__conversion_event_categorization__v1", + source_table='conversion_event_categorization_v1${{ macros.ds_format(macros.ds_add(ds, -14), "%Y-%m-%d", "%Y%m%d") }}', + dataset_id="google_ads_derived", + project_id="moz-fx-data-shared-prod", + is_dq_check_fail=False, + owner="kwindau@mozilla.com", + email=["kwindau@mozilla.com", "telemetry-alerts@mozilla.com"], + depends_on_past=False, + parameters=["report_date:DATE:{{macros.ds_add(ds, -14)}}"] + + ["submission_date:DATE:{{ds}}"], + retries=0, + ) + + google_ads_derived__conversion_event_categorization__v1 = bigquery_etl_query( + task_id="google_ads_derived__conversion_event_categorization__v1", + destination_table='conversion_event_categorization_v1${{ macros.ds_format(macros.ds_add(ds, -14), "%Y-%m-%d", "%Y%m%d") }}', + dataset_id="google_ads_derived", + project_id="moz-fx-data-shared-prod", + owner="kwindau@mozilla.com", + email=["kwindau@mozilla.com", "telemetry-alerts@mozilla.com"], + date_partition_parameter=None, + depends_on_past=False, + parameters=["report_date:DATE:{{macros.ds_add(ds, -14)}}"] + + ["submission_date:DATE:{{ds}}"], + ) + + checks__warn_google_ads_derived__conversion_event_categorization__v1.set_upstream( + google_ads_derived__conversion_event_categorization__v1 + ) + + google_ads_derived__conversion_event_categorization__v1.set_upstream( + wait_for_checks__fail_telemetry_derived__clients_first_seen__v2 + ) + + google_ads_derived__conversion_event_categorization__v1.set_upstream( + wait_for_checks__fail_telemetry_derived__clients_last_seen__v2 + ) diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/dags/bqetl_main_summary.py /tmp/workspace/generated-sql/dags/bqetl_main_summary.py --- /tmp/workspace/main-generated-sql/dags/bqetl_main_summary.py 2024-06-04 19:50:36.000000000 +0000 +++ /tmp/workspace/generated-sql/dags/bqetl_main_summary.py 2024-06-04 19:52:18.000000000 +0000 @@ -144,6 +144,13 @@ ) ExternalTaskMarker( + task_id="bqetl_desktop_conv_evnt_categorization__wait_for_checks__fail_telemetry_derived__clients_last_seen__v2", + external_dag_id="bqetl_desktop_conv_evnt_categorization", + external_task_id="wait_for_checks__fail_telemetry_derived__clients_last_seen__v2", + execution_date="{{ (execution_date - macros.timedelta(days=-1, seconds=50400)).isoformat() }}", + ) + + ExternalTaskMarker( task_id="bqetl_search_dashboard__wait_for_checks__fail_telemetry_derived__clients_last_seen__v2", external_dag_id="bqetl_search_dashboard", external_task_id="wait_for_checks__fail_telemetry_derived__clients_last_seen__v2", diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/dags/bqetl_unified.py /tmp/workspace/generated-sql/dags/bqetl_unified.py --- /tmp/workspace/main-generated-sql/dags/bqetl_unified.py 2024-06-04 19:50:36.000000000 +0000 +++ /tmp/workspace/generated-sql/dags/bqetl_unified.py 2024-06-04 19:52:19.000000000 +0000 @@ -188,6 +188,13 @@ execution_date="{{ (execution_date - macros.timedelta(seconds=10800)).isoformat() }}", ) + ExternalTaskMarker( + task_id="kpi_forecasting__wait_for_unified_metrics", + external_dag_id="kpi_forecasting", + external_task_id="wait_for_unified_metrics", + execution_date="{{ (execution_date + macros.timedelta(seconds=3600)).isoformat() }}", + ) + checks__fail_telemetry_derived__unified_metrics__v1_external.set_upstream( checks__fail_telemetry_derived__unified_metrics__v1 ) @@ -210,6 +217,20 @@ retries=0, ) + with TaskGroup( + "checks__warn_telemetry_derived__unified_metrics__v1_external", + ) as checks__warn_telemetry_derived__unified_metrics__v1_external: + ExternalTaskMarker( + task_id="kpi_forecasting__wait_for_unified_metrics", + external_dag_id="kpi_forecasting", + external_task_id="wait_for_unified_metrics", + execution_date="{{ (execution_date + macros.timedelta(seconds=3600)).isoformat() }}", + ) + + checks__warn_telemetry_derived__unified_metrics__v1_external.set_upstream( + checks__warn_telemetry_derived__unified_metrics__v1 + ) + telemetry_derived__rolling_cohorts__v1 = bigquery_etl_query( task_id="telemetry_derived__rolling_cohorts__v1", destination_table="rolling_cohorts_v1", @@ -257,6 +278,20 @@ depends_on_past=False, ) + with TaskGroup( + "telemetry_derived__unified_metrics__v1_external", + ) as telemetry_derived__unified_metrics__v1_external: + ExternalTaskMarker( + task_id="kpi_forecasting__wait_for_unified_metrics", + external_dag_id="kpi_forecasting", + external_task_id="wait_for_unified_metrics", + execution_date="{{ (execution_date + macros.timedelta(seconds=3600)).isoformat() }}", + ) + + telemetry_derived__unified_metrics__v1_external.set_upstream( + telemetry_derived__unified_metrics__v1 + ) + checks__fail_telemetry_derived__unified_metrics__v1.set_upstream( telemetry_derived__unified_metrics__v1 ) Only in /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_ads: conversion_event_categorization Only in /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_ads_derived: conversion_event_categorization_v1 diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/data-observability-dev/fenix_derived/firefox_android_clients_v1/query.sql /tmp/workspace/generated-sql/sql/data-observability-dev/fenix_derived/firefox_android_clients_v1/query.sql --- /tmp/workspace/main-generated-sql/sql/data-observability-dev/fenix_derived/firefox_android_clients_v1/query.sql 2024-06-04 19:47:11.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/data-observability-dev/fenix_derived/firefox_android_clients_v1/query.sql 2024-06-04 19:47:23.000000000 +0000 @@ -1,9 +1,4 @@ SELECT - *, - -- add some additional fields to test schema changes - CAST(NULL AS STRING) AS additional_field_1, - CAST(NULL AS INT64) AS additional_field_2, - CAST(NULL AS STRING) AS additional_field_3, - CAST(NULL AS STRING) AS additional_field_4, + * FROM `moz-fx-data-shared-prod.fenix_derived.firefox_android_clients_v1` diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/google_ads/conversion_event_categorization/metadata.yaml /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_ads/conversion_event_categorization/metadata.yaml --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/google_ads/conversion_event_categorization/metadata.yaml 1970-01-01 00:00:00.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_ads/conversion_event_categorization/metadata.yaml 2024-06-04 19:49:17.000000000 +0000 @@ -0,0 +1,13 @@ +friendly_name: Conversion Event Categorization +description: |- + Please provide a description for the query +owners: [] +labels: {} +bigquery: null +workgroup_access: +- role: roles/bigquery.dataViewer + members: + - workgroup:mozilla-confidential +references: + view.sql: + - moz-fx-data-shared-prod.google_ads_derived.conversion_event_categorization_v1 diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/google_ads/conversion_event_categorization/view.sql /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_ads/conversion_event_categorization/view.sql --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/google_ads/conversion_event_categorization/view.sql 1970-01-01 00:00:00.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_ads/conversion_event_categorization/view.sql 2024-06-04 19:47:23.000000000 +0000 @@ -0,0 +1,7 @@ +CREATE OR REPLACE VIEW + `moz-fx-data-shared-prod.google_ads.conversion_event_categorization` +AS +SELECT + * +FROM + `moz-fx-data-shared-prod.google_ads_derived.conversion_event_categorization_v1` diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/google_ads_derived/conversion_event_categorization_v1/checks.sql /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_ads_derived/conversion_event_categorization_v1/checks.sql --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/google_ads_derived/conversion_event_categorization_v1/checks.sql 1970-01-01 00:00:00.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_ads_derived/conversion_event_categorization_v1/checks.sql 2024-06-04 19:47:23.000000000 +0000 @@ -0,0 +1,2 @@ +#warn +{{ is_unique(["client_id"]) }} diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/google_ads_derived/conversion_event_categorization_v1/metadata.yaml /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_ads_derived/conversion_event_categorization_v1/metadata.yaml --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/google_ads_derived/conversion_event_categorization_v1/metadata.yaml 1970-01-01 00:00:00.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_ads_derived/conversion_event_categorization_v1/metadata.yaml 2024-06-04 19:49:12.000000000 +0000 @@ -0,0 +1,32 @@ +friendly_name: Conversion Event Categorization +description: |- + Classifies conversion events +owners: +- kwindau@mozilla.com +labels: + incremental: true + owner1: kwindau + dag: bqetl_desktop_conv_evnt_categorization +scheduling: + dag_name: bqetl_desktop_conv_evnt_categorization + depends_on_past: false + date_partition_parameter: report_date + date_partition_offset: -14 + parameters: + - submission_date:DATE:{{ds}} +bigquery: + time_partitioning: + type: day + field: report_date + require_partition_filter: true + expiration_days: null + range_partitioning: null + clustering: + fields: + - client_id + - country +workgroup_access: +- role: roles/bigquery.dataViewer + members: + - workgroup:mozilla-confidential +references: {} diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/google_ads_derived/conversion_event_categorization_v1/query.sql /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_ads_derived/conversion_event_categorization_v1/query.sql --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/google_ads_derived/conversion_event_categorization_v1/query.sql 1970-01-01 00:00:00.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_ads_derived/conversion_event_categorization_v1/query.sql 2024-06-04 19:47:23.000000000 +0000 @@ -0,0 +1,131 @@ +--STEP 1: Get clients with a first seen date = submission date - 14 days +--Note: Min cohort date is 2023-11-01 so backfilling will return nothing before then +--Note: Max cohort date cannot be more than 7 days ago (to ensure we always have at least 7 days of data) +WITH clients_first_seen_14_days_ago AS ( + SELECT + client_id, + first_seen_date, + country, + attribution_campaign, + attribution_content, + attribution_dltoken, + attribution_medium, + attribution_source + FROM + `moz-fx-data-shared-prod.telemetry.clients_first_seen` --contains all new clients, including those that never sent a main ping + WHERE + first_seen_date = @report_date --this is 14 days before {{ds}} + AND first_seen_date + BETWEEN '2023-11-01' + AND DATE_SUB(CURRENT_DATE, INTERVAL 8 DAY) +), +--Step 2: Get only the columns we need from clients last seen, for only the small window of time we need +clients_last_seen_raw AS ( + SELECT + cls.client_id, + cls.first_seen_date, + cls.country, + cls.submission_date, + cls.days_since_seen, + cls.active_hours_sum, + cls.days_visited_1_uri_bits, + cls.days_interacted_bits, + cls.search_with_ads_count_all + FROM + `moz-fx-data-shared-prod.telemetry.clients_last_seen` cls + JOIN + clients_first_seen_14_days_ago clients + ON cls.client_id = clients.client_id + WHERE + cls.submission_date >= '2023-11-01' --first cohort date + AND cls.submission_date + BETWEEN cls.first_seen_date + AND DATE_ADD(cls.first_seen_date, INTERVAL 6 DAY) --get first 7 days from their first main ping + --to process less data, we only check for pings between @submission date - 15 days and submission date + 15 days for each date this runs + AND cls.submission_date + BETWEEN DATE_SUB(@report_date, INTERVAL 1 DAY) --15 days before DS + AND DATE_ADD(@report_date, INTERVAL 29 DAY) --29 days after DS +), +--STEP 2: For every client, get the first 7 days worth of main pings sent after their first main ping +client_activity_first_7_days AS ( + SELECT + client_id, + ANY_VALUE( + first_seen_date + ) AS first_seen_date, --date we got first main ping (potentially different than above first seen date) + ANY_VALUE( + CASE + WHEN first_seen_date = submission_date + THEN country + END + ) AS country, --any country from their first day in clients_last_seen + ANY_VALUE( + CASE + WHEN submission_date = DATE_ADD(first_seen_date, INTERVAL 6 DAY) + THEN BIT_COUNT(days_visited_1_uri_bits & days_interacted_bits) + END + ) AS dou, --total # of days of activity during their first 7 days of main pings + -- if a client doesn't send a ping on `submission_date` their last active day's value will be carried forward + -- so we only take measurements from days that they send a ping. + SUM( + CASE + WHEN days_since_seen = 0 + THEN COALESCE(active_hours_sum, 0) + ELSE 0 + END + ) AS active_hours_sum, + SUM( + CASE + WHEN days_since_seen = 0 + THEN COALESCE(search_with_ads_count_all, 0) + ELSE 0 + END + ) AS search_with_ads_count_all + FROM + clients_last_seen_raw + GROUP BY + client_id +), +combined AS ( + SELECT + cfs.client_id, + cfs.first_seen_date, + cfs.attribution_campaign, + cfs.attribution_content, + cfs.attribution_dltoken, + cfs.attribution_medium, + cfs.attribution_source, + @report_date AS report_date, + IF(cls.first_seen_date IS NOT NULL, TRUE, FALSE) AS sent_main_ping_in_first_7_days, + COALESCE( + cls.country, + cfs.country + ) AS country, -- Conversion events & LTV are based on their first observed country in CLS, use that country if its available + COALESCE(dou, 0) AS dou, + COALESCE(active_hours_sum, 0) AS active_hours_sum, + COALESCE(search_with_ads_count_all, 0) AS search_with_ads_count_all + FROM + clients_first_seen_14_days_ago AS cfs + LEFT JOIN + client_activity_first_7_days AS cls + USING (client_id) +) +SELECT + client_id, + first_seen_date, + attribution_campaign, + attribution_content, + attribution_dltoken, + attribution_medium, + attribution_source, + report_date, + sent_main_ping_in_first_7_days, + country, + dou, + active_hours_sum, + search_with_ads_count_all, + IF(search_with_ads_count_all > 0 AND dou >= 5, TRUE, FALSE) AS event_1, + IF(search_with_ads_count_all > 0 AND dou >= 3, TRUE, FALSE) AS event_2, + IF(active_hours_sum >= 0.4 AND dou >= 3, TRUE, FALSE) AS event_3, +FROM + combined diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/google_ads_derived/conversion_event_categorization_v1/schema.yaml /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_ads_derived/conversion_event_categorization_v1/schema.yaml --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/google_ads_derived/conversion_event_categorization_v1/schema.yaml 1970-01-01 00:00:00.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_ads_derived/conversion_event_categorization_v1/schema.yaml 2024-06-04 19:47:23.000000000 +0000 @@ -0,0 +1,65 @@ +fields: +- mode: NULLABLE + name: client_id + type: STRING + description: Client ID +- mode: NULLABLE + name: first_seen_date + type: DATE + description: First Seen Date +- mode: NULLABLE + name: attribution_campaign + type: STRING + description: Attribution Campaign +- mode: NULLABLE + name: attribution_content + type: STRING + description: Attribution Content +- mode: NULLABLE + name: attribution_dltoken + type: STRING + description: Attribution Download Token +- mode: NULLABLE + name: attribution_medium + type: STRING + description: Attribution Medium +- mode: NULLABLE + name: attribution_source + type: STRING + description: Attribution Source +- mode: NULLABLE + name: report_date + type: DATE + description: Report Date +- mode: NULLABLE + name: sent_main_ping_in_first_7_days + type: BOOLEAN + description: Sent Main Ping In First 7 Days After First Seen Date Indicator +- mode: NULLABLE + name: country + type: STRING + description: Country +- mode: NULLABLE + name: dou + type: INT64 + description: DOU +- mode: NULLABLE + name: active_hours_sum + type: FLOAT + description: Active Hours Sum +- mode: NULLABLE + name: search_with_ads_count_all + type: INTEGER + description: Search With Ads Count All +- mode: NULLABLE + name: event_1 + type: BOOLEAN + description: Event 1 Indicator - 5 or more days of use and 1 or more search with ads (strictest event) +- mode: NULLABLE + name: event_2 + type: BOOLEAN + description: Event 2 Indicator - 3 or more days of use and 1 or more search with ads (medium event) +- mode: NULLABLE + name: event_3 + type: BOOLEAN + description: Event 3 Indicator - 3 or more days of use and 0.4 or more active hours (most lenient event) diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/telemetry_derived/unified_metrics_v1/metadata.yaml /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/telemetry_derived/unified_metrics_v1/metadata.yaml --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/telemetry_derived/unified_metrics_v1/metadata.yaml 2024-06-04 19:48:46.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/telemetry_derived/unified_metrics_v1/metadata.yaml 2024-06-04 19:49:14.000000000 +0000 @@ -19,6 +19,10 @@ owner1: loines scheduling: dag_name: bqetl_unified + external_downstream_tasks: + - task_id: wait_for_unified_metrics + dag_name: kpi_forecasting + execution_delta: 1h bigquery: time_partitioning: type: day ```

Link to full diff

dataops-ci-bot commented 4 months ago

Integration report for "Fix comment"

sql.diff

Click to expand! ```diff Only in /tmp/workspace/generated-sql/dags/: bqetl_desktop_conv_evnt_categorization.py diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/dags/bqetl_analytics_tables.py /tmp/workspace/generated-sql/dags/bqetl_analytics_tables.py --- /tmp/workspace/main-generated-sql/dags/bqetl_analytics_tables.py 2024-06-04 20:21:25.000000000 +0000 +++ /tmp/workspace/generated-sql/dags/bqetl_analytics_tables.py 2024-06-04 20:22:52.000000000 +0000 @@ -324,6 +324,13 @@ ) ExternalTaskMarker( + task_id="bqetl_desktop_conv_evnt_categorization__wait_for_checks__fail_telemetry_derived__clients_first_seen__v2", + external_dag_id="bqetl_desktop_conv_evnt_categorization", + external_task_id="wait_for_checks__fail_telemetry_derived__clients_first_seen__v2", + execution_date="{{ (execution_date - macros.timedelta(days=-1, seconds=50400)).isoformat() }}", + ) + + ExternalTaskMarker( task_id="bqetl_mozilla_org_derived__wait_for_checks__fail_telemetry_derived__clients_first_seen__v2", external_dag_id="bqetl_mozilla_org_derived", external_task_id="wait_for_checks__fail_telemetry_derived__clients_first_seen__v2", diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/dags/bqetl_desktop_conv_evnt_categorization.py /tmp/workspace/generated-sql/dags/bqetl_desktop_conv_evnt_categorization.py --- /tmp/workspace/main-generated-sql/dags/bqetl_desktop_conv_evnt_categorization.py 1970-01-01 00:00:00.000000000 +0000 +++ /tmp/workspace/generated-sql/dags/bqetl_desktop_conv_evnt_categorization.py 2024-06-04 20:22:53.000000000 +0000 @@ -0,0 +1,116 @@ +# Generated via https://github.com/mozilla/bigquery-etl/blob/main/bigquery_etl/query_scheduling/generate_airflow_dags.py + +from airflow import DAG +from airflow.sensors.external_task import ExternalTaskMarker +from airflow.sensors.external_task import ExternalTaskSensor +from airflow.utils.task_group import TaskGroup +import datetime +from operators.gcp_container_operator import GKEPodOperator +from utils.constants import ALLOWED_STATES, FAILED_STATES +from utils.gcp import bigquery_etl_query, bigquery_dq_check + +docs = """ +### bqetl_desktop_conv_evnt_categorization + +Built from bigquery-etl repo, [`dags/bqetl_desktop_conv_evnt_categorization.py`](https://github.com/mozilla/bigquery-etl/blob/generated-sql/dags/bqetl_desktop_conv_evnt_categorization.py) + +#### Description + +Loads the desktop conversion event tables +#### Owner + +kwindau@mozilla.com + +#### Tags + +* impact/tier_2 +* repo/bigquery-etl +""" + + +default_args = { + "owner": "kwindau@mozilla.com", + "start_date": datetime.datetime(2024, 6, 4, 0, 0), + "end_date": None, + "email": ["kwindau@mozilla.com", "telemetry-alerts@mozilla.com"], + "depends_on_past": False, + "retry_delay": datetime.timedelta(seconds=1800), + "email_on_failure": True, + "email_on_retry": False, + "retries": 2, +} + +tags = ["impact/tier_2", "repo/bigquery-etl"] + +with DAG( + "bqetl_desktop_conv_evnt_categorization", + default_args=default_args, + schedule_interval="0 12 * * *", + doc_md=docs, + tags=tags, +) as dag: + + wait_for_checks__fail_telemetry_derived__clients_first_seen__v2 = ( + ExternalTaskSensor( + task_id="wait_for_checks__fail_telemetry_derived__clients_first_seen__v2", + external_dag_id="bqetl_analytics_tables", + external_task_id="checks__fail_telemetry_derived__clients_first_seen__v2", + execution_delta=datetime.timedelta(seconds=36000), + check_existence=True, + mode="reschedule", + allowed_states=ALLOWED_STATES, + failed_states=FAILED_STATES, + pool="DATA_ENG_EXTERNALTASKSENSOR", + ) + ) + + wait_for_checks__fail_telemetry_derived__clients_last_seen__v2 = ExternalTaskSensor( + task_id="wait_for_checks__fail_telemetry_derived__clients_last_seen__v2", + external_dag_id="bqetl_main_summary", + external_task_id="checks__fail_telemetry_derived__clients_last_seen__v2", + execution_delta=datetime.timedelta(seconds=36000), + check_existence=True, + mode="reschedule", + allowed_states=ALLOWED_STATES, + failed_states=FAILED_STATES, + pool="DATA_ENG_EXTERNALTASKSENSOR", + ) + + checks__warn_google_ads_derived__conversion_event_categorization__v1 = bigquery_dq_check( + task_id="checks__warn_google_ads_derived__conversion_event_categorization__v1", + source_table='conversion_event_categorization_v1${{ macros.ds_format(macros.ds_add(ds, -14), "%Y-%m-%d", "%Y%m%d") }}', + dataset_id="google_ads_derived", + project_id="moz-fx-data-shared-prod", + is_dq_check_fail=False, + owner="kwindau@mozilla.com", + email=["kwindau@mozilla.com", "telemetry-alerts@mozilla.com"], + depends_on_past=False, + parameters=["report_date:DATE:{{macros.ds_add(ds, -14)}}"] + + ["submission_date:DATE:{{ds}}"], + retries=0, + ) + + google_ads_derived__conversion_event_categorization__v1 = bigquery_etl_query( + task_id="google_ads_derived__conversion_event_categorization__v1", + destination_table='conversion_event_categorization_v1${{ macros.ds_format(macros.ds_add(ds, -14), "%Y-%m-%d", "%Y%m%d") }}', + dataset_id="google_ads_derived", + project_id="moz-fx-data-shared-prod", + owner="kwindau@mozilla.com", + email=["kwindau@mozilla.com", "telemetry-alerts@mozilla.com"], + date_partition_parameter=None, + depends_on_past=False, + parameters=["report_date:DATE:{{macros.ds_add(ds, -14)}}"] + + ["submission_date:DATE:{{ds}}"], + ) + + checks__warn_google_ads_derived__conversion_event_categorization__v1.set_upstream( + google_ads_derived__conversion_event_categorization__v1 + ) + + google_ads_derived__conversion_event_categorization__v1.set_upstream( + wait_for_checks__fail_telemetry_derived__clients_first_seen__v2 + ) + + google_ads_derived__conversion_event_categorization__v1.set_upstream( + wait_for_checks__fail_telemetry_derived__clients_last_seen__v2 + ) diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/dags/bqetl_main_summary.py /tmp/workspace/generated-sql/dags/bqetl_main_summary.py --- /tmp/workspace/main-generated-sql/dags/bqetl_main_summary.py 2024-06-04 20:21:25.000000000 +0000 +++ /tmp/workspace/generated-sql/dags/bqetl_main_summary.py 2024-06-04 20:22:49.000000000 +0000 @@ -144,6 +144,13 @@ ) ExternalTaskMarker( + task_id="bqetl_desktop_conv_evnt_categorization__wait_for_checks__fail_telemetry_derived__clients_last_seen__v2", + external_dag_id="bqetl_desktop_conv_evnt_categorization", + external_task_id="wait_for_checks__fail_telemetry_derived__clients_last_seen__v2", + execution_date="{{ (execution_date - macros.timedelta(days=-1, seconds=50400)).isoformat() }}", + ) + + ExternalTaskMarker( task_id="bqetl_search_dashboard__wait_for_checks__fail_telemetry_derived__clients_last_seen__v2", external_dag_id="bqetl_search_dashboard", external_task_id="wait_for_checks__fail_telemetry_derived__clients_last_seen__v2", diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/dags/bqetl_unified.py /tmp/workspace/generated-sql/dags/bqetl_unified.py --- /tmp/workspace/main-generated-sql/dags/bqetl_unified.py 2024-06-04 20:21:25.000000000 +0000 +++ /tmp/workspace/generated-sql/dags/bqetl_unified.py 2024-06-04 20:22:50.000000000 +0000 @@ -188,6 +188,13 @@ execution_date="{{ (execution_date - macros.timedelta(seconds=10800)).isoformat() }}", ) + ExternalTaskMarker( + task_id="kpi_forecasting__wait_for_unified_metrics", + external_dag_id="kpi_forecasting", + external_task_id="wait_for_unified_metrics", + execution_date="{{ (execution_date + macros.timedelta(seconds=3600)).isoformat() }}", + ) + checks__fail_telemetry_derived__unified_metrics__v1_external.set_upstream( checks__fail_telemetry_derived__unified_metrics__v1 ) @@ -210,6 +217,20 @@ retries=0, ) + with TaskGroup( + "checks__warn_telemetry_derived__unified_metrics__v1_external", + ) as checks__warn_telemetry_derived__unified_metrics__v1_external: + ExternalTaskMarker( + task_id="kpi_forecasting__wait_for_unified_metrics", + external_dag_id="kpi_forecasting", + external_task_id="wait_for_unified_metrics", + execution_date="{{ (execution_date + macros.timedelta(seconds=3600)).isoformat() }}", + ) + + checks__warn_telemetry_derived__unified_metrics__v1_external.set_upstream( + checks__warn_telemetry_derived__unified_metrics__v1 + ) + telemetry_derived__rolling_cohorts__v1 = bigquery_etl_query( task_id="telemetry_derived__rolling_cohorts__v1", destination_table="rolling_cohorts_v1", @@ -257,6 +278,20 @@ depends_on_past=False, ) + with TaskGroup( + "telemetry_derived__unified_metrics__v1_external", + ) as telemetry_derived__unified_metrics__v1_external: + ExternalTaskMarker( + task_id="kpi_forecasting__wait_for_unified_metrics", + external_dag_id="kpi_forecasting", + external_task_id="wait_for_unified_metrics", + execution_date="{{ (execution_date + macros.timedelta(seconds=3600)).isoformat() }}", + ) + + telemetry_derived__unified_metrics__v1_external.set_upstream( + telemetry_derived__unified_metrics__v1 + ) + checks__fail_telemetry_derived__unified_metrics__v1.set_upstream( telemetry_derived__unified_metrics__v1 ) Only in /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_ads: conversion_event_categorization Only in /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_ads_derived: conversion_event_categorization_v1 diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/data-observability-dev/fenix_derived/firefox_android_clients_v1/query.sql /tmp/workspace/generated-sql/sql/data-observability-dev/fenix_derived/firefox_android_clients_v1/query.sql --- /tmp/workspace/main-generated-sql/sql/data-observability-dev/fenix_derived/firefox_android_clients_v1/query.sql 2024-06-04 20:17:56.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/data-observability-dev/fenix_derived/firefox_android_clients_v1/query.sql 2024-06-04 20:17:53.000000000 +0000 @@ -1,9 +1,4 @@ SELECT - *, - -- add some additional fields to test schema changes - CAST(NULL AS STRING) AS additional_field_1, - CAST(NULL AS INT64) AS additional_field_2, - CAST(NULL AS STRING) AS additional_field_3, - CAST(NULL AS STRING) AS additional_field_4, + * FROM `moz-fx-data-shared-prod.fenix_derived.firefox_android_clients_v1` diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/google_ads/conversion_event_categorization/metadata.yaml /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_ads/conversion_event_categorization/metadata.yaml --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/google_ads/conversion_event_categorization/metadata.yaml 1970-01-01 00:00:00.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_ads/conversion_event_categorization/metadata.yaml 2024-06-04 20:19:51.000000000 +0000 @@ -0,0 +1,13 @@ +friendly_name: Conversion Event Categorization +description: |- + Please provide a description for the query +owners: [] +labels: {} +bigquery: null +workgroup_access: +- role: roles/bigquery.dataViewer + members: + - workgroup:mozilla-confidential +references: + view.sql: + - moz-fx-data-shared-prod.google_ads_derived.conversion_event_categorization_v1 diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/google_ads/conversion_event_categorization/view.sql /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_ads/conversion_event_categorization/view.sql --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/google_ads/conversion_event_categorization/view.sql 1970-01-01 00:00:00.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_ads/conversion_event_categorization/view.sql 2024-06-04 20:17:53.000000000 +0000 @@ -0,0 +1,7 @@ +CREATE OR REPLACE VIEW + `moz-fx-data-shared-prod.google_ads.conversion_event_categorization` +AS +SELECT + * +FROM + `moz-fx-data-shared-prod.google_ads_derived.conversion_event_categorization_v1` diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/google_ads_derived/conversion_event_categorization_v1/checks.sql /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_ads_derived/conversion_event_categorization_v1/checks.sql --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/google_ads_derived/conversion_event_categorization_v1/checks.sql 1970-01-01 00:00:00.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_ads_derived/conversion_event_categorization_v1/checks.sql 2024-06-04 20:17:53.000000000 +0000 @@ -0,0 +1,2 @@ +#warn +{{ is_unique(["client_id"]) }} diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/google_ads_derived/conversion_event_categorization_v1/metadata.yaml /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_ads_derived/conversion_event_categorization_v1/metadata.yaml --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/google_ads_derived/conversion_event_categorization_v1/metadata.yaml 1970-01-01 00:00:00.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_ads_derived/conversion_event_categorization_v1/metadata.yaml 2024-06-04 20:19:51.000000000 +0000 @@ -0,0 +1,32 @@ +friendly_name: Conversion Event Categorization +description: |- + Classifies conversion events +owners: +- kwindau@mozilla.com +labels: + incremental: true + owner1: kwindau + dag: bqetl_desktop_conv_evnt_categorization +scheduling: + dag_name: bqetl_desktop_conv_evnt_categorization + depends_on_past: false + date_partition_parameter: report_date + date_partition_offset: -14 + parameters: + - submission_date:DATE:{{ds}} +bigquery: + time_partitioning: + type: day + field: report_date + require_partition_filter: true + expiration_days: null + range_partitioning: null + clustering: + fields: + - client_id + - country +workgroup_access: +- role: roles/bigquery.dataViewer + members: + - workgroup:mozilla-confidential +references: {} diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/google_ads_derived/conversion_event_categorization_v1/query.sql /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_ads_derived/conversion_event_categorization_v1/query.sql --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/google_ads_derived/conversion_event_categorization_v1/query.sql 1970-01-01 00:00:00.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_ads_derived/conversion_event_categorization_v1/query.sql 2024-06-04 20:17:53.000000000 +0000 @@ -0,0 +1,131 @@ +--STEP 1: Get clients with a first seen date = submission date - 14 days +--Note: Min cohort date is 2023-11-01 so backfilling will return nothing before then +--Note: Max cohort date cannot be more than 7 days ago (to ensure we always have at least 7 days of data) +WITH clients_first_seen_14_days_ago AS ( + SELECT + client_id, + first_seen_date, + country, + attribution_campaign, + attribution_content, + attribution_dltoken, + attribution_medium, + attribution_source + FROM + `moz-fx-data-shared-prod.telemetry.clients_first_seen` --contains all new clients, including those that never sent a main ping + WHERE + first_seen_date = @report_date --this is 14 days before {{ds}} + AND first_seen_date + BETWEEN '2023-11-01' + AND DATE_SUB(CURRENT_DATE, INTERVAL 8 DAY) +), +--Step 2: Get only the columns we need from clients last seen, for only the small window of time we need +clients_last_seen_raw AS ( + SELECT + cls.client_id, + cls.first_seen_date, + cls.country, + cls.submission_date, + cls.days_since_seen, + cls.active_hours_sum, + cls.days_visited_1_uri_bits, + cls.days_interacted_bits, + cls.search_with_ads_count_all + FROM + `moz-fx-data-shared-prod.telemetry.clients_last_seen` cls + JOIN + clients_first_seen_14_days_ago clients + ON cls.client_id = clients.client_id + WHERE + cls.submission_date >= '2023-11-01' --first cohort date + AND cls.submission_date + BETWEEN cls.first_seen_date + AND DATE_ADD(cls.first_seen_date, INTERVAL 6 DAY) --get first 7 days from their first main ping + --to process less data, we only check for pings between @submission date - 15 days and submission date + 15 days for each date this runs + AND cls.submission_date + BETWEEN DATE_SUB(@report_date, INTERVAL 1 DAY) --15 days before DS + AND DATE_ADD(@report_date, INTERVAL 29 DAY) --15 days after DS +), +--STEP 2: For every client, get the first 7 days worth of main pings sent after their first main ping +client_activity_first_7_days AS ( + SELECT + client_id, + ANY_VALUE( + first_seen_date + ) AS first_seen_date, --date we got first main ping (potentially different than above first seen date) + ANY_VALUE( + CASE + WHEN first_seen_date = submission_date + THEN country + END + ) AS country, --any country from their first day in clients_last_seen + ANY_VALUE( + CASE + WHEN submission_date = DATE_ADD(first_seen_date, INTERVAL 6 DAY) + THEN BIT_COUNT(days_visited_1_uri_bits & days_interacted_bits) + END + ) AS dou, --total # of days of activity during their first 7 days of main pings + -- if a client doesn't send a ping on `submission_date` their last active day's value will be carried forward + -- so we only take measurements from days that they send a ping. + SUM( + CASE + WHEN days_since_seen = 0 + THEN COALESCE(active_hours_sum, 0) + ELSE 0 + END + ) AS active_hours_sum, + SUM( + CASE + WHEN days_since_seen = 0 + THEN COALESCE(search_with_ads_count_all, 0) + ELSE 0 + END + ) AS search_with_ads_count_all + FROM + clients_last_seen_raw + GROUP BY + client_id +), +combined AS ( + SELECT + cfs.client_id, + cfs.first_seen_date, + cfs.attribution_campaign, + cfs.attribution_content, + cfs.attribution_dltoken, + cfs.attribution_medium, + cfs.attribution_source, + @report_date AS report_date, + IF(cls.first_seen_date IS NOT NULL, TRUE, FALSE) AS sent_main_ping_in_first_7_days, + COALESCE( + cls.country, + cfs.country + ) AS country, -- Conversion events & LTV are based on their first observed country in CLS, use that country if its available + COALESCE(dou, 0) AS dou, + COALESCE(active_hours_sum, 0) AS active_hours_sum, + COALESCE(search_with_ads_count_all, 0) AS search_with_ads_count_all + FROM + clients_first_seen_14_days_ago AS cfs + LEFT JOIN + client_activity_first_7_days AS cls + USING (client_id) +) +SELECT + client_id, + first_seen_date, + attribution_campaign, + attribution_content, + attribution_dltoken, + attribution_medium, + attribution_source, + report_date, + sent_main_ping_in_first_7_days, + country, + dou, + active_hours_sum, + search_with_ads_count_all, + IF(search_with_ads_count_all > 0 AND dou >= 5, TRUE, FALSE) AS event_1, + IF(search_with_ads_count_all > 0 AND dou >= 3, TRUE, FALSE) AS event_2, + IF(active_hours_sum >= 0.4 AND dou >= 3, TRUE, FALSE) AS event_3, +FROM + combined diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/google_ads_derived/conversion_event_categorization_v1/schema.yaml /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_ads_derived/conversion_event_categorization_v1/schema.yaml --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/google_ads_derived/conversion_event_categorization_v1/schema.yaml 1970-01-01 00:00:00.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_ads_derived/conversion_event_categorization_v1/schema.yaml 2024-06-04 20:17:53.000000000 +0000 @@ -0,0 +1,65 @@ +fields: +- mode: NULLABLE + name: client_id + type: STRING + description: Client ID +- mode: NULLABLE + name: first_seen_date + type: DATE + description: First Seen Date +- mode: NULLABLE + name: attribution_campaign + type: STRING + description: Attribution Campaign +- mode: NULLABLE + name: attribution_content + type: STRING + description: Attribution Content +- mode: NULLABLE + name: attribution_dltoken + type: STRING + description: Attribution Download Token +- mode: NULLABLE + name: attribution_medium + type: STRING + description: Attribution Medium +- mode: NULLABLE + name: attribution_source + type: STRING + description: Attribution Source +- mode: NULLABLE + name: report_date + type: DATE + description: Report Date +- mode: NULLABLE + name: sent_main_ping_in_first_7_days + type: BOOLEAN + description: Sent Main Ping In First 7 Days After First Seen Date Indicator +- mode: NULLABLE + name: country + type: STRING + description: Country +- mode: NULLABLE + name: dou + type: INT64 + description: DOU +- mode: NULLABLE + name: active_hours_sum + type: FLOAT + description: Active Hours Sum +- mode: NULLABLE + name: search_with_ads_count_all + type: INTEGER + description: Search With Ads Count All +- mode: NULLABLE + name: event_1 + type: BOOLEAN + description: Event 1 Indicator - 5 or more days of use and 1 or more search with ads (strictest event) +- mode: NULLABLE + name: event_2 + type: BOOLEAN + description: Event 2 Indicator - 3 or more days of use and 1 or more search with ads (medium event) +- mode: NULLABLE + name: event_3 + type: BOOLEAN + description: Event 3 Indicator - 3 or more days of use and 0.4 or more active hours (most lenient event) diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/telemetry_derived/unified_metrics_v1/metadata.yaml /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/telemetry_derived/unified_metrics_v1/metadata.yaml --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/telemetry_derived/unified_metrics_v1/metadata.yaml 2024-06-04 20:19:29.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/telemetry_derived/unified_metrics_v1/metadata.yaml 2024-06-04 20:19:57.000000000 +0000 @@ -19,6 +19,10 @@ owner1: loines scheduling: dag_name: bqetl_unified + external_downstream_tasks: + - task_id: wait_for_unified_metrics + dag_name: kpi_forecasting + execution_delta: 1h bigquery: time_partitioning: type: day ```

Link to full diff

dataops-ci-bot commented 4 months ago

Integration report for "Merge branch 'main' into dsktp-conv-evnts"

sql.diff

Click to expand! ```diff Only in /tmp/workspace/generated-sql/dags/: bqetl_desktop_conv_evnt_categorization.py diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/dags/bqetl_analytics_tables.py /tmp/workspace/generated-sql/dags/bqetl_analytics_tables.py --- /tmp/workspace/main-generated-sql/dags/bqetl_analytics_tables.py 2024-06-04 20:25:08.000000000 +0000 +++ /tmp/workspace/generated-sql/dags/bqetl_analytics_tables.py 2024-06-04 20:26:22.000000000 +0000 @@ -324,6 +324,13 @@ ) ExternalTaskMarker( + task_id="bqetl_desktop_conv_evnt_categorization__wait_for_checks__fail_telemetry_derived__clients_first_seen__v2", + external_dag_id="bqetl_desktop_conv_evnt_categorization", + external_task_id="wait_for_checks__fail_telemetry_derived__clients_first_seen__v2", + execution_date="{{ (execution_date - macros.timedelta(days=-1, seconds=50400)).isoformat() }}", + ) + + ExternalTaskMarker( task_id="bqetl_mozilla_org_derived__wait_for_checks__fail_telemetry_derived__clients_first_seen__v2", external_dag_id="bqetl_mozilla_org_derived", external_task_id="wait_for_checks__fail_telemetry_derived__clients_first_seen__v2", diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/dags/bqetl_desktop_conv_evnt_categorization.py /tmp/workspace/generated-sql/dags/bqetl_desktop_conv_evnt_categorization.py --- /tmp/workspace/main-generated-sql/dags/bqetl_desktop_conv_evnt_categorization.py 1970-01-01 00:00:00.000000000 +0000 +++ /tmp/workspace/generated-sql/dags/bqetl_desktop_conv_evnt_categorization.py 2024-06-04 20:26:23.000000000 +0000 @@ -0,0 +1,116 @@ +# Generated via https://github.com/mozilla/bigquery-etl/blob/main/bigquery_etl/query_scheduling/generate_airflow_dags.py + +from airflow import DAG +from airflow.sensors.external_task import ExternalTaskMarker +from airflow.sensors.external_task import ExternalTaskSensor +from airflow.utils.task_group import TaskGroup +import datetime +from operators.gcp_container_operator import GKEPodOperator +from utils.constants import ALLOWED_STATES, FAILED_STATES +from utils.gcp import bigquery_etl_query, bigquery_dq_check + +docs = """ +### bqetl_desktop_conv_evnt_categorization + +Built from bigquery-etl repo, [`dags/bqetl_desktop_conv_evnt_categorization.py`](https://github.com/mozilla/bigquery-etl/blob/generated-sql/dags/bqetl_desktop_conv_evnt_categorization.py) + +#### Description + +Loads the desktop conversion event tables +#### Owner + +kwindau@mozilla.com + +#### Tags + +* impact/tier_2 +* repo/bigquery-etl +""" + + +default_args = { + "owner": "kwindau@mozilla.com", + "start_date": datetime.datetime(2024, 6, 4, 0, 0), + "end_date": None, + "email": ["kwindau@mozilla.com", "telemetry-alerts@mozilla.com"], + "depends_on_past": False, + "retry_delay": datetime.timedelta(seconds=1800), + "email_on_failure": True, + "email_on_retry": False, + "retries": 2, +} + +tags = ["impact/tier_2", "repo/bigquery-etl"] + +with DAG( + "bqetl_desktop_conv_evnt_categorization", + default_args=default_args, + schedule_interval="0 12 * * *", + doc_md=docs, + tags=tags, +) as dag: + + wait_for_checks__fail_telemetry_derived__clients_first_seen__v2 = ( + ExternalTaskSensor( + task_id="wait_for_checks__fail_telemetry_derived__clients_first_seen__v2", + external_dag_id="bqetl_analytics_tables", + external_task_id="checks__fail_telemetry_derived__clients_first_seen__v2", + execution_delta=datetime.timedelta(seconds=36000), + check_existence=True, + mode="reschedule", + allowed_states=ALLOWED_STATES, + failed_states=FAILED_STATES, + pool="DATA_ENG_EXTERNALTASKSENSOR", + ) + ) + + wait_for_checks__fail_telemetry_derived__clients_last_seen__v2 = ExternalTaskSensor( + task_id="wait_for_checks__fail_telemetry_derived__clients_last_seen__v2", + external_dag_id="bqetl_main_summary", + external_task_id="checks__fail_telemetry_derived__clients_last_seen__v2", + execution_delta=datetime.timedelta(seconds=36000), + check_existence=True, + mode="reschedule", + allowed_states=ALLOWED_STATES, + failed_states=FAILED_STATES, + pool="DATA_ENG_EXTERNALTASKSENSOR", + ) + + checks__warn_google_ads_derived__conversion_event_categorization__v1 = bigquery_dq_check( + task_id="checks__warn_google_ads_derived__conversion_event_categorization__v1", + source_table='conversion_event_categorization_v1${{ macros.ds_format(macros.ds_add(ds, -14), "%Y-%m-%d", "%Y%m%d") }}', + dataset_id="google_ads_derived", + project_id="moz-fx-data-shared-prod", + is_dq_check_fail=False, + owner="kwindau@mozilla.com", + email=["kwindau@mozilla.com", "telemetry-alerts@mozilla.com"], + depends_on_past=False, + parameters=["report_date:DATE:{{macros.ds_add(ds, -14)}}"] + + ["submission_date:DATE:{{ds}}"], + retries=0, + ) + + google_ads_derived__conversion_event_categorization__v1 = bigquery_etl_query( + task_id="google_ads_derived__conversion_event_categorization__v1", + destination_table='conversion_event_categorization_v1${{ macros.ds_format(macros.ds_add(ds, -14), "%Y-%m-%d", "%Y%m%d") }}', + dataset_id="google_ads_derived", + project_id="moz-fx-data-shared-prod", + owner="kwindau@mozilla.com", + email=["kwindau@mozilla.com", "telemetry-alerts@mozilla.com"], + date_partition_parameter=None, + depends_on_past=False, + parameters=["report_date:DATE:{{macros.ds_add(ds, -14)}}"] + + ["submission_date:DATE:{{ds}}"], + ) + + checks__warn_google_ads_derived__conversion_event_categorization__v1.set_upstream( + google_ads_derived__conversion_event_categorization__v1 + ) + + google_ads_derived__conversion_event_categorization__v1.set_upstream( + wait_for_checks__fail_telemetry_derived__clients_first_seen__v2 + ) + + google_ads_derived__conversion_event_categorization__v1.set_upstream( + wait_for_checks__fail_telemetry_derived__clients_last_seen__v2 + ) diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/dags/bqetl_main_summary.py /tmp/workspace/generated-sql/dags/bqetl_main_summary.py --- /tmp/workspace/main-generated-sql/dags/bqetl_main_summary.py 2024-06-04 20:25:08.000000000 +0000 +++ /tmp/workspace/generated-sql/dags/bqetl_main_summary.py 2024-06-04 20:26:19.000000000 +0000 @@ -144,6 +144,13 @@ ) ExternalTaskMarker( + task_id="bqetl_desktop_conv_evnt_categorization__wait_for_checks__fail_telemetry_derived__clients_last_seen__v2", + external_dag_id="bqetl_desktop_conv_evnt_categorization", + external_task_id="wait_for_checks__fail_telemetry_derived__clients_last_seen__v2", + execution_date="{{ (execution_date - macros.timedelta(days=-1, seconds=50400)).isoformat() }}", + ) + + ExternalTaskMarker( task_id="bqetl_search_dashboard__wait_for_checks__fail_telemetry_derived__clients_last_seen__v2", external_dag_id="bqetl_search_dashboard", external_task_id="wait_for_checks__fail_telemetry_derived__clients_last_seen__v2", Only in /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_ads: conversion_event_categorization Only in /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_ads_derived: conversion_event_categorization_v1 diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/google_ads/conversion_event_categorization/metadata.yaml /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_ads/conversion_event_categorization/metadata.yaml --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/google_ads/conversion_event_categorization/metadata.yaml 1970-01-01 00:00:00.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_ads/conversion_event_categorization/metadata.yaml 2024-06-04 20:23:21.000000000 +0000 @@ -0,0 +1,13 @@ +friendly_name: Conversion Event Categorization +description: |- + Please provide a description for the query +owners: [] +labels: {} +bigquery: null +workgroup_access: +- role: roles/bigquery.dataViewer + members: + - workgroup:mozilla-confidential +references: + view.sql: + - moz-fx-data-shared-prod.google_ads_derived.conversion_event_categorization_v1 diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/google_ads/conversion_event_categorization/view.sql /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_ads/conversion_event_categorization/view.sql --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/google_ads/conversion_event_categorization/view.sql 1970-01-01 00:00:00.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_ads/conversion_event_categorization/view.sql 2024-06-04 20:21:30.000000000 +0000 @@ -0,0 +1,7 @@ +CREATE OR REPLACE VIEW + `moz-fx-data-shared-prod.google_ads.conversion_event_categorization` +AS +SELECT + * +FROM + `moz-fx-data-shared-prod.google_ads_derived.conversion_event_categorization_v1` diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/google_ads_derived/conversion_event_categorization_v1/checks.sql /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_ads_derived/conversion_event_categorization_v1/checks.sql --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/google_ads_derived/conversion_event_categorization_v1/checks.sql 1970-01-01 00:00:00.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_ads_derived/conversion_event_categorization_v1/checks.sql 2024-06-04 20:21:30.000000000 +0000 @@ -0,0 +1,2 @@ +#warn +{{ is_unique(["client_id"]) }} diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/google_ads_derived/conversion_event_categorization_v1/metadata.yaml /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_ads_derived/conversion_event_categorization_v1/metadata.yaml --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/google_ads_derived/conversion_event_categorization_v1/metadata.yaml 1970-01-01 00:00:00.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_ads_derived/conversion_event_categorization_v1/metadata.yaml 2024-06-04 20:23:21.000000000 +0000 @@ -0,0 +1,32 @@ +friendly_name: Conversion Event Categorization +description: |- + Classifies conversion events +owners: +- kwindau@mozilla.com +labels: + incremental: true + owner1: kwindau + dag: bqetl_desktop_conv_evnt_categorization +scheduling: + dag_name: bqetl_desktop_conv_evnt_categorization + depends_on_past: false + date_partition_parameter: report_date + date_partition_offset: -14 + parameters: + - submission_date:DATE:{{ds}} +bigquery: + time_partitioning: + type: day + field: report_date + require_partition_filter: true + expiration_days: null + range_partitioning: null + clustering: + fields: + - client_id + - country +workgroup_access: +- role: roles/bigquery.dataViewer + members: + - workgroup:mozilla-confidential +references: {} diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/google_ads_derived/conversion_event_categorization_v1/query.sql /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_ads_derived/conversion_event_categorization_v1/query.sql --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/google_ads_derived/conversion_event_categorization_v1/query.sql 1970-01-01 00:00:00.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_ads_derived/conversion_event_categorization_v1/query.sql 2024-06-04 20:21:30.000000000 +0000 @@ -0,0 +1,131 @@ +--STEP 1: Get clients with a first seen date = submission date - 14 days +--Note: Min cohort date is 2023-11-01 so backfilling will return nothing before then +--Note: Max cohort date cannot be more than 7 days ago (to ensure we always have at least 7 days of data) +WITH clients_first_seen_14_days_ago AS ( + SELECT + client_id, + first_seen_date, + country, + attribution_campaign, + attribution_content, + attribution_dltoken, + attribution_medium, + attribution_source + FROM + `moz-fx-data-shared-prod.telemetry.clients_first_seen` --contains all new clients, including those that never sent a main ping + WHERE + first_seen_date = @report_date --this is 14 days before {{ds}} + AND first_seen_date + BETWEEN '2023-11-01' + AND DATE_SUB(CURRENT_DATE, INTERVAL 8 DAY) +), +--Step 2: Get only the columns we need from clients last seen, for only the small window of time we need +clients_last_seen_raw AS ( + SELECT + cls.client_id, + cls.first_seen_date, + cls.country, + cls.submission_date, + cls.days_since_seen, + cls.active_hours_sum, + cls.days_visited_1_uri_bits, + cls.days_interacted_bits, + cls.search_with_ads_count_all + FROM + `moz-fx-data-shared-prod.telemetry.clients_last_seen` cls + JOIN + clients_first_seen_14_days_ago clients + ON cls.client_id = clients.client_id + WHERE + cls.submission_date >= '2023-11-01' --first cohort date + AND cls.submission_date + BETWEEN cls.first_seen_date + AND DATE_ADD(cls.first_seen_date, INTERVAL 6 DAY) --get first 7 days from their first main ping + --to process less data, we only check for pings between @submission date - 15 days and submission date + 15 days for each date this runs + AND cls.submission_date + BETWEEN DATE_SUB(@report_date, INTERVAL 1 DAY) --15 days before DS + AND DATE_ADD(@report_date, INTERVAL 29 DAY) --15 days after DS +), +--STEP 2: For every client, get the first 7 days worth of main pings sent after their first main ping +client_activity_first_7_days AS ( + SELECT + client_id, + ANY_VALUE( + first_seen_date + ) AS first_seen_date, --date we got first main ping (potentially different than above first seen date) + ANY_VALUE( + CASE + WHEN first_seen_date = submission_date + THEN country + END + ) AS country, --any country from their first day in clients_last_seen + ANY_VALUE( + CASE + WHEN submission_date = DATE_ADD(first_seen_date, INTERVAL 6 DAY) + THEN BIT_COUNT(days_visited_1_uri_bits & days_interacted_bits) + END + ) AS dou, --total # of days of activity during their first 7 days of main pings + -- if a client doesn't send a ping on `submission_date` their last active day's value will be carried forward + -- so we only take measurements from days that they send a ping. + SUM( + CASE + WHEN days_since_seen = 0 + THEN COALESCE(active_hours_sum, 0) + ELSE 0 + END + ) AS active_hours_sum, + SUM( + CASE + WHEN days_since_seen = 0 + THEN COALESCE(search_with_ads_count_all, 0) + ELSE 0 + END + ) AS search_with_ads_count_all + FROM + clients_last_seen_raw + GROUP BY + client_id +), +combined AS ( + SELECT + cfs.client_id, + cfs.first_seen_date, + cfs.attribution_campaign, + cfs.attribution_content, + cfs.attribution_dltoken, + cfs.attribution_medium, + cfs.attribution_source, + @report_date AS report_date, + IF(cls.first_seen_date IS NOT NULL, TRUE, FALSE) AS sent_main_ping_in_first_7_days, + COALESCE( + cls.country, + cfs.country + ) AS country, -- Conversion events & LTV are based on their first observed country in CLS, use that country if its available + COALESCE(dou, 0) AS dou, + COALESCE(active_hours_sum, 0) AS active_hours_sum, + COALESCE(search_with_ads_count_all, 0) AS search_with_ads_count_all + FROM + clients_first_seen_14_days_ago AS cfs + LEFT JOIN + client_activity_first_7_days AS cls + USING (client_id) +) +SELECT + client_id, + first_seen_date, + attribution_campaign, + attribution_content, + attribution_dltoken, + attribution_medium, + attribution_source, + report_date, + sent_main_ping_in_first_7_days, + country, + dou, + active_hours_sum, + search_with_ads_count_all, + IF(search_with_ads_count_all > 0 AND dou >= 5, TRUE, FALSE) AS event_1, + IF(search_with_ads_count_all > 0 AND dou >= 3, TRUE, FALSE) AS event_2, + IF(active_hours_sum >= 0.4 AND dou >= 3, TRUE, FALSE) AS event_3, +FROM + combined diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/google_ads_derived/conversion_event_categorization_v1/schema.yaml /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_ads_derived/conversion_event_categorization_v1/schema.yaml --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/google_ads_derived/conversion_event_categorization_v1/schema.yaml 1970-01-01 00:00:00.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_ads_derived/conversion_event_categorization_v1/schema.yaml 2024-06-04 20:21:30.000000000 +0000 @@ -0,0 +1,65 @@ +fields: +- mode: NULLABLE + name: client_id + type: STRING + description: Client ID +- mode: NULLABLE + name: first_seen_date + type: DATE + description: First Seen Date +- mode: NULLABLE + name: attribution_campaign + type: STRING + description: Attribution Campaign +- mode: NULLABLE + name: attribution_content + type: STRING + description: Attribution Content +- mode: NULLABLE + name: attribution_dltoken + type: STRING + description: Attribution Download Token +- mode: NULLABLE + name: attribution_medium + type: STRING + description: Attribution Medium +- mode: NULLABLE + name: attribution_source + type: STRING + description: Attribution Source +- mode: NULLABLE + name: report_date + type: DATE + description: Report Date +- mode: NULLABLE + name: sent_main_ping_in_first_7_days + type: BOOLEAN + description: Sent Main Ping In First 7 Days After First Seen Date Indicator +- mode: NULLABLE + name: country + type: STRING + description: Country +- mode: NULLABLE + name: dou + type: INT64 + description: DOU +- mode: NULLABLE + name: active_hours_sum + type: FLOAT + description: Active Hours Sum +- mode: NULLABLE + name: search_with_ads_count_all + type: INTEGER + description: Search With Ads Count All +- mode: NULLABLE + name: event_1 + type: BOOLEAN + description: Event 1 Indicator - 5 or more days of use and 1 or more search with ads (strictest event) +- mode: NULLABLE + name: event_2 + type: BOOLEAN + description: Event 2 Indicator - 3 or more days of use and 1 or more search with ads (medium event) +- mode: NULLABLE + name: event_3 + type: BOOLEAN + description: Event 3 Indicator - 3 or more days of use and 0.4 or more active hours (most lenient event) ```

Link to full diff

dataops-ci-bot commented 4 months ago

Integration report for "Update report date"

sql.diff

Click to expand! ```diff Only in /tmp/workspace/generated-sql/dags/: bqetl_desktop_conv_evnt_categorization.py diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/dags/bqetl_analytics_tables.py /tmp/workspace/generated-sql/dags/bqetl_analytics_tables.py --- /tmp/workspace/main-generated-sql/dags/bqetl_analytics_tables.py 2024-06-04 20:54:52.000000000 +0000 +++ /tmp/workspace/generated-sql/dags/bqetl_analytics_tables.py 2024-06-04 20:56:05.000000000 +0000 @@ -324,6 +324,13 @@ ) ExternalTaskMarker( + task_id="bqetl_desktop_conv_evnt_categorization__wait_for_checks__fail_telemetry_derived__clients_first_seen__v2", + external_dag_id="bqetl_desktop_conv_evnt_categorization", + external_task_id="wait_for_checks__fail_telemetry_derived__clients_first_seen__v2", + execution_date="{{ (execution_date - macros.timedelta(days=-1, seconds=50400)).isoformat() }}", + ) + + ExternalTaskMarker( task_id="bqetl_mozilla_org_derived__wait_for_checks__fail_telemetry_derived__clients_first_seen__v2", external_dag_id="bqetl_mozilla_org_derived", external_task_id="wait_for_checks__fail_telemetry_derived__clients_first_seen__v2", diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/dags/bqetl_desktop_conv_evnt_categorization.py /tmp/workspace/generated-sql/dags/bqetl_desktop_conv_evnt_categorization.py --- /tmp/workspace/main-generated-sql/dags/bqetl_desktop_conv_evnt_categorization.py 1970-01-01 00:00:00.000000000 +0000 +++ /tmp/workspace/generated-sql/dags/bqetl_desktop_conv_evnt_categorization.py 2024-06-04 20:56:06.000000000 +0000 @@ -0,0 +1,116 @@ +# Generated via https://github.com/mozilla/bigquery-etl/blob/main/bigquery_etl/query_scheduling/generate_airflow_dags.py + +from airflow import DAG +from airflow.sensors.external_task import ExternalTaskMarker +from airflow.sensors.external_task import ExternalTaskSensor +from airflow.utils.task_group import TaskGroup +import datetime +from operators.gcp_container_operator import GKEPodOperator +from utils.constants import ALLOWED_STATES, FAILED_STATES +from utils.gcp import bigquery_etl_query, bigquery_dq_check + +docs = """ +### bqetl_desktop_conv_evnt_categorization + +Built from bigquery-etl repo, [`dags/bqetl_desktop_conv_evnt_categorization.py`](https://github.com/mozilla/bigquery-etl/blob/generated-sql/dags/bqetl_desktop_conv_evnt_categorization.py) + +#### Description + +Loads the desktop conversion event tables +#### Owner + +kwindau@mozilla.com + +#### Tags + +* impact/tier_2 +* repo/bigquery-etl +""" + + +default_args = { + "owner": "kwindau@mozilla.com", + "start_date": datetime.datetime(2024, 6, 4, 0, 0), + "end_date": None, + "email": ["kwindau@mozilla.com", "telemetry-alerts@mozilla.com"], + "depends_on_past": False, + "retry_delay": datetime.timedelta(seconds=1800), + "email_on_failure": True, + "email_on_retry": False, + "retries": 2, +} + +tags = ["impact/tier_2", "repo/bigquery-etl"] + +with DAG( + "bqetl_desktop_conv_evnt_categorization", + default_args=default_args, + schedule_interval="0 12 * * *", + doc_md=docs, + tags=tags, +) as dag: + + wait_for_checks__fail_telemetry_derived__clients_first_seen__v2 = ( + ExternalTaskSensor( + task_id="wait_for_checks__fail_telemetry_derived__clients_first_seen__v2", + external_dag_id="bqetl_analytics_tables", + external_task_id="checks__fail_telemetry_derived__clients_first_seen__v2", + execution_delta=datetime.timedelta(seconds=36000), + check_existence=True, + mode="reschedule", + allowed_states=ALLOWED_STATES, + failed_states=FAILED_STATES, + pool="DATA_ENG_EXTERNALTASKSENSOR", + ) + ) + + wait_for_checks__fail_telemetry_derived__clients_last_seen__v2 = ExternalTaskSensor( + task_id="wait_for_checks__fail_telemetry_derived__clients_last_seen__v2", + external_dag_id="bqetl_main_summary", + external_task_id="checks__fail_telemetry_derived__clients_last_seen__v2", + execution_delta=datetime.timedelta(seconds=36000), + check_existence=True, + mode="reschedule", + allowed_states=ALLOWED_STATES, + failed_states=FAILED_STATES, + pool="DATA_ENG_EXTERNALTASKSENSOR", + ) + + checks__warn_google_ads_derived__conversion_event_categorization__v1 = bigquery_dq_check( + task_id="checks__warn_google_ads_derived__conversion_event_categorization__v1", + source_table='conversion_event_categorization_v1${{ macros.ds_format(macros.ds_add(ds, -14), "%Y-%m-%d", "%Y%m%d") }}', + dataset_id="google_ads_derived", + project_id="moz-fx-data-shared-prod", + is_dq_check_fail=False, + owner="kwindau@mozilla.com", + email=["kwindau@mozilla.com", "telemetry-alerts@mozilla.com"], + depends_on_past=False, + parameters=["report_date:DATE:{{macros.ds_add(ds, -14)}}"] + + ["submission_date:DATE:{{ds}}"], + retries=0, + ) + + google_ads_derived__conversion_event_categorization__v1 = bigquery_etl_query( + task_id="google_ads_derived__conversion_event_categorization__v1", + destination_table='conversion_event_categorization_v1${{ macros.ds_format(macros.ds_add(ds, -14), "%Y-%m-%d", "%Y%m%d") }}', + dataset_id="google_ads_derived", + project_id="moz-fx-data-shared-prod", + owner="kwindau@mozilla.com", + email=["kwindau@mozilla.com", "telemetry-alerts@mozilla.com"], + date_partition_parameter=None, + depends_on_past=False, + parameters=["report_date:DATE:{{macros.ds_add(ds, -14)}}"] + + ["submission_date:DATE:{{ds}}"], + ) + + checks__warn_google_ads_derived__conversion_event_categorization__v1.set_upstream( + google_ads_derived__conversion_event_categorization__v1 + ) + + google_ads_derived__conversion_event_categorization__v1.set_upstream( + wait_for_checks__fail_telemetry_derived__clients_first_seen__v2 + ) + + google_ads_derived__conversion_event_categorization__v1.set_upstream( + wait_for_checks__fail_telemetry_derived__clients_last_seen__v2 + ) diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/dags/bqetl_main_summary.py /tmp/workspace/generated-sql/dags/bqetl_main_summary.py --- /tmp/workspace/main-generated-sql/dags/bqetl_main_summary.py 2024-06-04 20:54:52.000000000 +0000 +++ /tmp/workspace/generated-sql/dags/bqetl_main_summary.py 2024-06-04 20:56:02.000000000 +0000 @@ -144,6 +144,13 @@ ) ExternalTaskMarker( + task_id="bqetl_desktop_conv_evnt_categorization__wait_for_checks__fail_telemetry_derived__clients_last_seen__v2", + external_dag_id="bqetl_desktop_conv_evnt_categorization", + external_task_id="wait_for_checks__fail_telemetry_derived__clients_last_seen__v2", + execution_date="{{ (execution_date - macros.timedelta(days=-1, seconds=50400)).isoformat() }}", + ) + + ExternalTaskMarker( task_id="bqetl_search_dashboard__wait_for_checks__fail_telemetry_derived__clients_last_seen__v2", external_dag_id="bqetl_search_dashboard", external_task_id="wait_for_checks__fail_telemetry_derived__clients_last_seen__v2", Only in /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_ads: conversion_event_categorization Only in /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_ads_derived: conversion_event_categorization_v1 diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/google_ads/conversion_event_categorization/metadata.yaml /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_ads/conversion_event_categorization/metadata.yaml --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/google_ads/conversion_event_categorization/metadata.yaml 1970-01-01 00:00:00.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_ads/conversion_event_categorization/metadata.yaml 2024-06-04 20:53:06.000000000 +0000 @@ -0,0 +1,13 @@ +friendly_name: Conversion Event Categorization +description: |- + Please provide a description for the query +owners: [] +labels: {} +bigquery: null +workgroup_access: +- role: roles/bigquery.dataViewer + members: + - workgroup:mozilla-confidential +references: + view.sql: + - moz-fx-data-shared-prod.google_ads_derived.conversion_event_categorization_v1 diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/google_ads/conversion_event_categorization/view.sql /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_ads/conversion_event_categorization/view.sql --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/google_ads/conversion_event_categorization/view.sql 1970-01-01 00:00:00.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_ads/conversion_event_categorization/view.sql 2024-06-04 20:51:07.000000000 +0000 @@ -0,0 +1,7 @@ +CREATE OR REPLACE VIEW + `moz-fx-data-shared-prod.google_ads.conversion_event_categorization` +AS +SELECT + * +FROM + `moz-fx-data-shared-prod.google_ads_derived.conversion_event_categorization_v1` diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/google_ads_derived/conversion_event_categorization_v1/checks.sql /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_ads_derived/conversion_event_categorization_v1/checks.sql --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/google_ads_derived/conversion_event_categorization_v1/checks.sql 1970-01-01 00:00:00.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_ads_derived/conversion_event_categorization_v1/checks.sql 2024-06-04 20:51:07.000000000 +0000 @@ -0,0 +1,2 @@ +#warn +{{ is_unique(["client_id"]) }} diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/google_ads_derived/conversion_event_categorization_v1/metadata.yaml /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_ads_derived/conversion_event_categorization_v1/metadata.yaml --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/google_ads_derived/conversion_event_categorization_v1/metadata.yaml 1970-01-01 00:00:00.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_ads_derived/conversion_event_categorization_v1/metadata.yaml 2024-06-04 20:53:06.000000000 +0000 @@ -0,0 +1,32 @@ +friendly_name: Conversion Event Categorization +description: |- + Classifies conversion events +owners: +- kwindau@mozilla.com +labels: + incremental: true + owner1: kwindau + dag: bqetl_desktop_conv_evnt_categorization +scheduling: + dag_name: bqetl_desktop_conv_evnt_categorization + depends_on_past: false + date_partition_parameter: report_date + date_partition_offset: -14 + parameters: + - submission_date:DATE:{{ds}} +bigquery: + time_partitioning: + type: day + field: report_date + require_partition_filter: true + expiration_days: null + range_partitioning: null + clustering: + fields: + - client_id + - country +workgroup_access: +- role: roles/bigquery.dataViewer + members: + - workgroup:mozilla-confidential +references: {} diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/google_ads_derived/conversion_event_categorization_v1/query.sql /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_ads_derived/conversion_event_categorization_v1/query.sql --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/google_ads_derived/conversion_event_categorization_v1/query.sql 1970-01-01 00:00:00.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_ads_derived/conversion_event_categorization_v1/query.sql 2024-06-04 20:51:07.000000000 +0000 @@ -0,0 +1,131 @@ +--STEP 1: Get clients with a first seen date = submission date - 14 days +--Note: Min cohort date is 2023-11-01 so backfilling will return nothing before then +--Note: Max cohort date cannot be more than 7 days ago (to ensure we always have at least 7 days of data) +WITH clients_first_seen_14_days_ago AS ( + SELECT + client_id, + first_seen_date, + country, + attribution_campaign, + attribution_content, + attribution_dltoken, + attribution_medium, + attribution_source + FROM + `moz-fx-data-shared-prod.telemetry.clients_first_seen` --contains all new clients, including those that never sent a main ping + WHERE + first_seen_date = @report_date --this is 14 days before {{ds}} + AND first_seen_date + BETWEEN '2023-11-01' + AND DATE_SUB(CURRENT_DATE, INTERVAL 8 DAY) +), +--Step 2: Get only the columns we need from clients last seen, for only the small window of time we need +clients_last_seen_raw AS ( + SELECT + cls.client_id, + cls.first_seen_date, + cls.country, + cls.submission_date, + cls.days_since_seen, + cls.active_hours_sum, + cls.days_visited_1_uri_bits, + cls.days_interacted_bits, + cls.search_with_ads_count_all + FROM + `moz-fx-data-shared-prod.telemetry.clients_last_seen` cls + JOIN + clients_first_seen_14_days_ago clients + ON cls.client_id = clients.client_id + WHERE + cls.submission_date >= '2023-11-01' --first cohort date + AND cls.submission_date + BETWEEN cls.first_seen_date + AND DATE_ADD(cls.first_seen_date, INTERVAL 6 DAY) --get first 7 days from their first main ping + --to process less data, we only check for pings between @submission date - 15 days and submission date + 15 days for each date this runs + AND cls.submission_date + BETWEEN DATE_SUB(@report_date, INTERVAL 1 DAY) --15 days before DS + AND DATE_ADD(@report_date, INTERVAL 29 DAY) --15 days after DS +), +--STEP 2: For every client, get the first 7 days worth of main pings sent after their first main ping +client_activity_first_7_days AS ( + SELECT + client_id, + ANY_VALUE( + first_seen_date + ) AS first_seen_date, --date we got first main ping (potentially different than above first seen date) + ANY_VALUE( + CASE + WHEN first_seen_date = submission_date + THEN country + END + ) AS country, --any country from their first day in clients_last_seen + ANY_VALUE( + CASE + WHEN submission_date = DATE_ADD(first_seen_date, INTERVAL 6 DAY) + THEN BIT_COUNT(days_visited_1_uri_bits & days_interacted_bits) + END + ) AS dou, --total # of days of activity during their first 7 days of main pings + -- if a client doesn't send a ping on `submission_date` their last active day's value will be carried forward + -- so we only take measurements from days that they send a ping. + SUM( + CASE + WHEN days_since_seen = 0 + THEN COALESCE(active_hours_sum, 0) + ELSE 0 + END + ) AS active_hours_sum, + SUM( + CASE + WHEN days_since_seen = 0 + THEN COALESCE(search_with_ads_count_all, 0) + ELSE 0 + END + ) AS search_with_ads_count_all + FROM + clients_last_seen_raw + GROUP BY + client_id +), +combined AS ( + SELECT + cfs.client_id, + cfs.first_seen_date, + cfs.attribution_campaign, + cfs.attribution_content, + cfs.attribution_dltoken, + cfs.attribution_medium, + cfs.attribution_source, + @submission_date AS report_date, + IF(cls.first_seen_date IS NOT NULL, TRUE, FALSE) AS sent_main_ping_in_first_7_days, + COALESCE( + cls.country, + cfs.country + ) AS country, -- Conversion events & LTV are based on their first observed country in CLS, use that country if its available + COALESCE(dou, 0) AS dou, + COALESCE(active_hours_sum, 0) AS active_hours_sum, + COALESCE(search_with_ads_count_all, 0) AS search_with_ads_count_all + FROM + clients_first_seen_14_days_ago AS cfs + LEFT JOIN + client_activity_first_7_days AS cls + USING (client_id) +) +SELECT + client_id, + first_seen_date, + attribution_campaign, + attribution_content, + attribution_dltoken, + attribution_medium, + attribution_source, + report_date, + sent_main_ping_in_first_7_days, + country, + dou, + active_hours_sum, + search_with_ads_count_all, + IF(search_with_ads_count_all > 0 AND dou >= 5, TRUE, FALSE) AS event_1, + IF(search_with_ads_count_all > 0 AND dou >= 3, TRUE, FALSE) AS event_2, + IF(active_hours_sum >= 0.4 AND dou >= 3, TRUE, FALSE) AS event_3, +FROM + combined diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/google_ads_derived/conversion_event_categorization_v1/schema.yaml /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_ads_derived/conversion_event_categorization_v1/schema.yaml --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/google_ads_derived/conversion_event_categorization_v1/schema.yaml 1970-01-01 00:00:00.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_ads_derived/conversion_event_categorization_v1/schema.yaml 2024-06-04 20:51:07.000000000 +0000 @@ -0,0 +1,65 @@ +fields: +- mode: NULLABLE + name: client_id + type: STRING + description: Client ID +- mode: NULLABLE + name: first_seen_date + type: DATE + description: First Seen Date +- mode: NULLABLE + name: attribution_campaign + type: STRING + description: Attribution Campaign +- mode: NULLABLE + name: attribution_content + type: STRING + description: Attribution Content +- mode: NULLABLE + name: attribution_dltoken + type: STRING + description: Attribution Download Token +- mode: NULLABLE + name: attribution_medium + type: STRING + description: Attribution Medium +- mode: NULLABLE + name: attribution_source + type: STRING + description: Attribution Source +- mode: NULLABLE + name: report_date + type: DATE + description: Report Date +- mode: NULLABLE + name: sent_main_ping_in_first_7_days + type: BOOLEAN + description: Sent Main Ping In First 7 Days After First Seen Date Indicator +- mode: NULLABLE + name: country + type: STRING + description: Country +- mode: NULLABLE + name: dou + type: INT64 + description: DOU +- mode: NULLABLE + name: active_hours_sum + type: FLOAT + description: Active Hours Sum +- mode: NULLABLE + name: search_with_ads_count_all + type: INTEGER + description: Search With Ads Count All +- mode: NULLABLE + name: event_1 + type: BOOLEAN + description: Event 1 Indicator - 5 or more days of use and 1 or more search with ads (strictest event) +- mode: NULLABLE + name: event_2 + type: BOOLEAN + description: Event 2 Indicator - 3 or more days of use and 1 or more search with ads (medium event) +- mode: NULLABLE + name: event_3 + type: BOOLEAN + description: Event 3 Indicator - 3 or more days of use and 0.4 or more active hours (most lenient event) ```

Link to full diff

dataops-ci-bot commented 4 months ago

Integration report for "GROWTH-143 partition on first seen date since that's really the cohort"

sql.diff

Click to expand! ```diff Only in /tmp/workspace/generated-sql/dags/: bqetl_desktop_conv_evnt_categorization.py diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/dags/bqetl_analytics_tables.py /tmp/workspace/generated-sql/dags/bqetl_analytics_tables.py --- /tmp/workspace/main-generated-sql/dags/bqetl_analytics_tables.py 2024-06-04 21:02:08.000000000 +0000 +++ /tmp/workspace/generated-sql/dags/bqetl_analytics_tables.py 2024-06-04 21:03:44.000000000 +0000 @@ -324,6 +324,13 @@ ) ExternalTaskMarker( + task_id="bqetl_desktop_conv_evnt_categorization__wait_for_checks__fail_telemetry_derived__clients_first_seen__v2", + external_dag_id="bqetl_desktop_conv_evnt_categorization", + external_task_id="wait_for_checks__fail_telemetry_derived__clients_first_seen__v2", + execution_date="{{ (execution_date - macros.timedelta(days=-1, seconds=50400)).isoformat() }}", + ) + + ExternalTaskMarker( task_id="bqetl_mozilla_org_derived__wait_for_checks__fail_telemetry_derived__clients_first_seen__v2", external_dag_id="bqetl_mozilla_org_derived", external_task_id="wait_for_checks__fail_telemetry_derived__clients_first_seen__v2", diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/dags/bqetl_desktop_conv_evnt_categorization.py /tmp/workspace/generated-sql/dags/bqetl_desktop_conv_evnt_categorization.py --- /tmp/workspace/main-generated-sql/dags/bqetl_desktop_conv_evnt_categorization.py 1970-01-01 00:00:00.000000000 +0000 +++ /tmp/workspace/generated-sql/dags/bqetl_desktop_conv_evnt_categorization.py 2024-06-04 21:03:45.000000000 +0000 @@ -0,0 +1,116 @@ +# Generated via https://github.com/mozilla/bigquery-etl/blob/main/bigquery_etl/query_scheduling/generate_airflow_dags.py + +from airflow import DAG +from airflow.sensors.external_task import ExternalTaskMarker +from airflow.sensors.external_task import ExternalTaskSensor +from airflow.utils.task_group import TaskGroup +import datetime +from operators.gcp_container_operator import GKEPodOperator +from utils.constants import ALLOWED_STATES, FAILED_STATES +from utils.gcp import bigquery_etl_query, bigquery_dq_check + +docs = """ +### bqetl_desktop_conv_evnt_categorization + +Built from bigquery-etl repo, [`dags/bqetl_desktop_conv_evnt_categorization.py`](https://github.com/mozilla/bigquery-etl/blob/generated-sql/dags/bqetl_desktop_conv_evnt_categorization.py) + +#### Description + +Loads the desktop conversion event tables +#### Owner + +kwindau@mozilla.com + +#### Tags + +* impact/tier_2 +* repo/bigquery-etl +""" + + +default_args = { + "owner": "kwindau@mozilla.com", + "start_date": datetime.datetime(2024, 6, 4, 0, 0), + "end_date": None, + "email": ["kwindau@mozilla.com", "telemetry-alerts@mozilla.com"], + "depends_on_past": False, + "retry_delay": datetime.timedelta(seconds=1800), + "email_on_failure": True, + "email_on_retry": False, + "retries": 2, +} + +tags = ["impact/tier_2", "repo/bigquery-etl"] + +with DAG( + "bqetl_desktop_conv_evnt_categorization", + default_args=default_args, + schedule_interval="0 12 * * *", + doc_md=docs, + tags=tags, +) as dag: + + wait_for_checks__fail_telemetry_derived__clients_first_seen__v2 = ( + ExternalTaskSensor( + task_id="wait_for_checks__fail_telemetry_derived__clients_first_seen__v2", + external_dag_id="bqetl_analytics_tables", + external_task_id="checks__fail_telemetry_derived__clients_first_seen__v2", + execution_delta=datetime.timedelta(seconds=36000), + check_existence=True, + mode="reschedule", + allowed_states=ALLOWED_STATES, + failed_states=FAILED_STATES, + pool="DATA_ENG_EXTERNALTASKSENSOR", + ) + ) + + wait_for_checks__fail_telemetry_derived__clients_last_seen__v2 = ExternalTaskSensor( + task_id="wait_for_checks__fail_telemetry_derived__clients_last_seen__v2", + external_dag_id="bqetl_main_summary", + external_task_id="checks__fail_telemetry_derived__clients_last_seen__v2", + execution_delta=datetime.timedelta(seconds=36000), + check_existence=True, + mode="reschedule", + allowed_states=ALLOWED_STATES, + failed_states=FAILED_STATES, + pool="DATA_ENG_EXTERNALTASKSENSOR", + ) + + checks__warn_google_ads_derived__conversion_event_categorization__v1 = bigquery_dq_check( + task_id="checks__warn_google_ads_derived__conversion_event_categorization__v1", + source_table='conversion_event_categorization_v1${{ macros.ds_format(macros.ds_add(ds, -14), "%Y-%m-%d", "%Y%m%d") }}', + dataset_id="google_ads_derived", + project_id="moz-fx-data-shared-prod", + is_dq_check_fail=False, + owner="kwindau@mozilla.com", + email=["kwindau@mozilla.com", "telemetry-alerts@mozilla.com"], + depends_on_past=False, + parameters=["report_date:DATE:{{macros.ds_add(ds, -14)}}"] + + ["submission_date:DATE:{{ds}}"], + retries=0, + ) + + google_ads_derived__conversion_event_categorization__v1 = bigquery_etl_query( + task_id="google_ads_derived__conversion_event_categorization__v1", + destination_table='conversion_event_categorization_v1${{ macros.ds_format(macros.ds_add(ds, -14), "%Y-%m-%d", "%Y%m%d") }}', + dataset_id="google_ads_derived", + project_id="moz-fx-data-shared-prod", + owner="kwindau@mozilla.com", + email=["kwindau@mozilla.com", "telemetry-alerts@mozilla.com"], + date_partition_parameter=None, + depends_on_past=False, + parameters=["report_date:DATE:{{macros.ds_add(ds, -14)}}"] + + ["submission_date:DATE:{{ds}}"], + ) + + checks__warn_google_ads_derived__conversion_event_categorization__v1.set_upstream( + google_ads_derived__conversion_event_categorization__v1 + ) + + google_ads_derived__conversion_event_categorization__v1.set_upstream( + wait_for_checks__fail_telemetry_derived__clients_first_seen__v2 + ) + + google_ads_derived__conversion_event_categorization__v1.set_upstream( + wait_for_checks__fail_telemetry_derived__clients_last_seen__v2 + ) diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/dags/bqetl_main_summary.py /tmp/workspace/generated-sql/dags/bqetl_main_summary.py --- /tmp/workspace/main-generated-sql/dags/bqetl_main_summary.py 2024-06-04 21:02:08.000000000 +0000 +++ /tmp/workspace/generated-sql/dags/bqetl_main_summary.py 2024-06-04 21:03:41.000000000 +0000 @@ -144,6 +144,13 @@ ) ExternalTaskMarker( + task_id="bqetl_desktop_conv_evnt_categorization__wait_for_checks__fail_telemetry_derived__clients_last_seen__v2", + external_dag_id="bqetl_desktop_conv_evnt_categorization", + external_task_id="wait_for_checks__fail_telemetry_derived__clients_last_seen__v2", + execution_date="{{ (execution_date - macros.timedelta(days=-1, seconds=50400)).isoformat() }}", + ) + + ExternalTaskMarker( task_id="bqetl_search_dashboard__wait_for_checks__fail_telemetry_derived__clients_last_seen__v2", external_dag_id="bqetl_search_dashboard", external_task_id="wait_for_checks__fail_telemetry_derived__clients_last_seen__v2", Only in /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_ads: conversion_event_categorization Only in /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_ads_derived: conversion_event_categorization_v1 diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/google_ads/conversion_event_categorization/metadata.yaml /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_ads/conversion_event_categorization/metadata.yaml --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/google_ads/conversion_event_categorization/metadata.yaml 1970-01-01 00:00:00.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_ads/conversion_event_categorization/metadata.yaml 2024-06-04 21:00:36.000000000 +0000 @@ -0,0 +1,13 @@ +friendly_name: Conversion Event Categorization +description: |- + Please provide a description for the query +owners: [] +labels: {} +bigquery: null +workgroup_access: +- role: roles/bigquery.dataViewer + members: + - workgroup:mozilla-confidential +references: + view.sql: + - moz-fx-data-shared-prod.google_ads_derived.conversion_event_categorization_v1 diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/google_ads/conversion_event_categorization/view.sql /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_ads/conversion_event_categorization/view.sql --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/google_ads/conversion_event_categorization/view.sql 1970-01-01 00:00:00.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_ads/conversion_event_categorization/view.sql 2024-06-04 20:58:42.000000000 +0000 @@ -0,0 +1,7 @@ +CREATE OR REPLACE VIEW + `moz-fx-data-shared-prod.google_ads.conversion_event_categorization` +AS +SELECT + * +FROM + `moz-fx-data-shared-prod.google_ads_derived.conversion_event_categorization_v1` diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/google_ads_derived/conversion_event_categorization_v1/checks.sql /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_ads_derived/conversion_event_categorization_v1/checks.sql --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/google_ads_derived/conversion_event_categorization_v1/checks.sql 1970-01-01 00:00:00.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_ads_derived/conversion_event_categorization_v1/checks.sql 2024-06-04 20:58:42.000000000 +0000 @@ -0,0 +1,2 @@ +#warn +{{ is_unique(["client_id"]) }} diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/google_ads_derived/conversion_event_categorization_v1/metadata.yaml /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_ads_derived/conversion_event_categorization_v1/metadata.yaml --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/google_ads_derived/conversion_event_categorization_v1/metadata.yaml 1970-01-01 00:00:00.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_ads_derived/conversion_event_categorization_v1/metadata.yaml 2024-06-04 21:00:36.000000000 +0000 @@ -0,0 +1,32 @@ +friendly_name: Conversion Event Categorization +description: |- + Classifies conversion events +owners: +- kwindau@mozilla.com +labels: + incremental: true + owner1: kwindau + dag: bqetl_desktop_conv_evnt_categorization +scheduling: + dag_name: bqetl_desktop_conv_evnt_categorization + depends_on_past: false + date_partition_parameter: report_date + date_partition_offset: -14 + parameters: + - submission_date:DATE:{{ds}} +bigquery: + time_partitioning: + type: day + field: first_seen_date + require_partition_filter: true + expiration_days: null + range_partitioning: null + clustering: + fields: + - client_id + - country +workgroup_access: +- role: roles/bigquery.dataViewer + members: + - workgroup:mozilla-confidential +references: {} diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/google_ads_derived/conversion_event_categorization_v1/query.sql /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_ads_derived/conversion_event_categorization_v1/query.sql --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/google_ads_derived/conversion_event_categorization_v1/query.sql 1970-01-01 00:00:00.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_ads_derived/conversion_event_categorization_v1/query.sql 2024-06-04 20:58:42.000000000 +0000 @@ -0,0 +1,130 @@ +--STEP 1: Get clients with a first seen date = submission date - 14 days +--Note: Min cohort date is 2023-11-01 so backfilling will return nothing before then +--Note: Max cohort date cannot be more than 7 days ago (to ensure we always have at least 7 days of data) +WITH clients_first_seen_14_days_ago AS ( + SELECT + client_id, + first_seen_date, + country, + attribution_campaign, + attribution_content, + attribution_dltoken, + attribution_medium, + attribution_source + FROM + `moz-fx-data-shared-prod.telemetry.clients_first_seen` --contains all new clients, including those that never sent a main ping + WHERE + first_seen_date = @report_date --this is 14 days before {{ds}} + AND first_seen_date + BETWEEN '2023-11-01' + AND DATE_SUB(CURRENT_DATE, INTERVAL 8 DAY) +), +--Step 2: Get only the columns we need from clients last seen, for only the small window of time we need +clients_last_seen_raw AS ( + SELECT + cls.client_id, + cls.first_seen_date, + cls.country, + cls.submission_date, + cls.days_since_seen, + cls.active_hours_sum, + cls.days_visited_1_uri_bits, + cls.days_interacted_bits, + cls.search_with_ads_count_all + FROM + `moz-fx-data-shared-prod.telemetry.clients_last_seen` cls + JOIN + clients_first_seen_14_days_ago clients + ON cls.client_id = clients.client_id + WHERE + cls.submission_date >= '2023-11-01' --first cohort date + AND cls.submission_date + BETWEEN cls.first_seen_date + AND DATE_ADD(cls.first_seen_date, INTERVAL 6 DAY) --get first 7 days from their first main ping + --to process less data, we only check for pings between @submission date - 15 days and submission date + 15 days for each date this runs + AND cls.submission_date + BETWEEN DATE_SUB(@report_date, INTERVAL 1 DAY) --15 days before DS + AND DATE_ADD(@report_date, INTERVAL 29 DAY) --15 days after DS +), +--STEP 2: For every client, get the first 7 days worth of main pings sent after their first main ping +client_activity_first_7_days AS ( + SELECT + client_id, + ANY_VALUE( + first_seen_date + ) AS first_seen_date, --date we got first main ping (potentially different than above first seen date) + ANY_VALUE( + CASE + WHEN first_seen_date = submission_date + THEN country + END + ) AS country, --any country from their first day in clients_last_seen + ANY_VALUE( + CASE + WHEN submission_date = DATE_ADD(first_seen_date, INTERVAL 6 DAY) + THEN BIT_COUNT(days_visited_1_uri_bits & days_interacted_bits) + END + ) AS dou, --total # of days of activity during their first 7 days of main pings + -- if a client doesn't send a ping on `submission_date` their last active day's value will be carried forward + -- so we only take measurements from days that they send a ping. + SUM( + CASE + WHEN days_since_seen = 0 + THEN COALESCE(active_hours_sum, 0) + ELSE 0 + END + ) AS active_hours_sum, + SUM( + CASE + WHEN days_since_seen = 0 + THEN COALESCE(search_with_ads_count_all, 0) + ELSE 0 + END + ) AS search_with_ads_count_all + FROM + clients_last_seen_raw + GROUP BY + client_id +), +combined AS ( + SELECT + cfs.client_id, + cfs.first_seen_date, + cfs.attribution_campaign, + cfs.attribution_content, + cfs.attribution_dltoken, + cfs.attribution_medium, + cfs.attribution_source, + IF(cls.first_seen_date IS NOT NULL, TRUE, FALSE) AS sent_main_ping_in_first_7_days, + COALESCE( + cls.country, + cfs.country + ) AS country, -- Conversion events & LTV are based on their first observed country in CLS, use that country if its available + COALESCE(dou, 0) AS dou, + COALESCE(active_hours_sum, 0) AS active_hours_sum, + COALESCE(search_with_ads_count_all, 0) AS search_with_ads_count_all + FROM + clients_first_seen_14_days_ago AS cfs + LEFT JOIN + client_activity_first_7_days AS cls + USING (client_id) +) +SELECT + client_id, + first_seen_date, + attribution_campaign, + attribution_content, + attribution_dltoken, + attribution_medium, + attribution_source, + report_date, + sent_main_ping_in_first_7_days, + country, + dou, + active_hours_sum, + search_with_ads_count_all, + IF(search_with_ads_count_all > 0 AND dou >= 5, TRUE, FALSE) AS event_1, + IF(search_with_ads_count_all > 0 AND dou >= 3, TRUE, FALSE) AS event_2, + IF(active_hours_sum >= 0.4 AND dou >= 3, TRUE, FALSE) AS event_3, +FROM + combined diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/google_ads_derived/conversion_event_categorization_v1/schema.yaml /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_ads_derived/conversion_event_categorization_v1/schema.yaml --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/google_ads_derived/conversion_event_categorization_v1/schema.yaml 1970-01-01 00:00:00.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_ads_derived/conversion_event_categorization_v1/schema.yaml 2024-06-04 20:58:42.000000000 +0000 @@ -0,0 +1,65 @@ +fields: +- mode: NULLABLE + name: client_id + type: STRING + description: Client ID +- mode: NULLABLE + name: first_seen_date + type: DATE + description: First Seen Date +- mode: NULLABLE + name: attribution_campaign + type: STRING + description: Attribution Campaign +- mode: NULLABLE + name: attribution_content + type: STRING + description: Attribution Content +- mode: NULLABLE + name: attribution_dltoken + type: STRING + description: Attribution Download Token +- mode: NULLABLE + name: attribution_medium + type: STRING + description: Attribution Medium +- mode: NULLABLE + name: attribution_source + type: STRING + description: Attribution Source +- mode: NULLABLE + name: report_date + type: DATE + description: Report Date +- mode: NULLABLE + name: sent_main_ping_in_first_7_days + type: BOOLEAN + description: Sent Main Ping In First 7 Days After First Seen Date Indicator +- mode: NULLABLE + name: country + type: STRING + description: Country +- mode: NULLABLE + name: dou + type: INT64 + description: DOU +- mode: NULLABLE + name: active_hours_sum + type: FLOAT + description: Active Hours Sum +- mode: NULLABLE + name: search_with_ads_count_all + type: INTEGER + description: Search With Ads Count All +- mode: NULLABLE + name: event_1 + type: BOOLEAN + description: Event 1 Indicator - 5 or more days of use and 1 or more search with ads (strictest event) +- mode: NULLABLE + name: event_2 + type: BOOLEAN + description: Event 2 Indicator - 3 or more days of use and 1 or more search with ads (medium event) +- mode: NULLABLE + name: event_3 + type: BOOLEAN + description: Event 3 Indicator - 3 or more days of use and 0.4 or more active hours (most lenient event) ```

Link to full diff

dataops-ci-bot commented 4 months ago

Integration report for "set report date as submission date"

sql.diff

Click to expand! ```diff Only in /tmp/workspace/generated-sql/dags/: bqetl_desktop_conv_evnt_categorization.py diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/dags/bqetl_analytics_tables.py /tmp/workspace/generated-sql/dags/bqetl_analytics_tables.py --- /tmp/workspace/main-generated-sql/dags/bqetl_analytics_tables.py 2024-06-04 21:06:21.000000000 +0000 +++ /tmp/workspace/generated-sql/dags/bqetl_analytics_tables.py 2024-06-04 21:07:42.000000000 +0000 @@ -324,6 +324,13 @@ ) ExternalTaskMarker( + task_id="bqetl_desktop_conv_evnt_categorization__wait_for_checks__fail_telemetry_derived__clients_first_seen__v2", + external_dag_id="bqetl_desktop_conv_evnt_categorization", + external_task_id="wait_for_checks__fail_telemetry_derived__clients_first_seen__v2", + execution_date="{{ (execution_date - macros.timedelta(days=-1, seconds=50400)).isoformat() }}", + ) + + ExternalTaskMarker( task_id="bqetl_mozilla_org_derived__wait_for_checks__fail_telemetry_derived__clients_first_seen__v2", external_dag_id="bqetl_mozilla_org_derived", external_task_id="wait_for_checks__fail_telemetry_derived__clients_first_seen__v2", diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/dags/bqetl_desktop_conv_evnt_categorization.py /tmp/workspace/generated-sql/dags/bqetl_desktop_conv_evnt_categorization.py --- /tmp/workspace/main-generated-sql/dags/bqetl_desktop_conv_evnt_categorization.py 1970-01-01 00:00:00.000000000 +0000 +++ /tmp/workspace/generated-sql/dags/bqetl_desktop_conv_evnt_categorization.py 2024-06-04 21:07:44.000000000 +0000 @@ -0,0 +1,116 @@ +# Generated via https://github.com/mozilla/bigquery-etl/blob/main/bigquery_etl/query_scheduling/generate_airflow_dags.py + +from airflow import DAG +from airflow.sensors.external_task import ExternalTaskMarker +from airflow.sensors.external_task import ExternalTaskSensor +from airflow.utils.task_group import TaskGroup +import datetime +from operators.gcp_container_operator import GKEPodOperator +from utils.constants import ALLOWED_STATES, FAILED_STATES +from utils.gcp import bigquery_etl_query, bigquery_dq_check + +docs = """ +### bqetl_desktop_conv_evnt_categorization + +Built from bigquery-etl repo, [`dags/bqetl_desktop_conv_evnt_categorization.py`](https://github.com/mozilla/bigquery-etl/blob/generated-sql/dags/bqetl_desktop_conv_evnt_categorization.py) + +#### Description + +Loads the desktop conversion event tables +#### Owner + +kwindau@mozilla.com + +#### Tags + +* impact/tier_2 +* repo/bigquery-etl +""" + + +default_args = { + "owner": "kwindau@mozilla.com", + "start_date": datetime.datetime(2024, 6, 4, 0, 0), + "end_date": None, + "email": ["kwindau@mozilla.com", "telemetry-alerts@mozilla.com"], + "depends_on_past": False, + "retry_delay": datetime.timedelta(seconds=1800), + "email_on_failure": True, + "email_on_retry": False, + "retries": 2, +} + +tags = ["impact/tier_2", "repo/bigquery-etl"] + +with DAG( + "bqetl_desktop_conv_evnt_categorization", + default_args=default_args, + schedule_interval="0 12 * * *", + doc_md=docs, + tags=tags, +) as dag: + + wait_for_checks__fail_telemetry_derived__clients_first_seen__v2 = ( + ExternalTaskSensor( + task_id="wait_for_checks__fail_telemetry_derived__clients_first_seen__v2", + external_dag_id="bqetl_analytics_tables", + external_task_id="checks__fail_telemetry_derived__clients_first_seen__v2", + execution_delta=datetime.timedelta(seconds=36000), + check_existence=True, + mode="reschedule", + allowed_states=ALLOWED_STATES, + failed_states=FAILED_STATES, + pool="DATA_ENG_EXTERNALTASKSENSOR", + ) + ) + + wait_for_checks__fail_telemetry_derived__clients_last_seen__v2 = ExternalTaskSensor( + task_id="wait_for_checks__fail_telemetry_derived__clients_last_seen__v2", + external_dag_id="bqetl_main_summary", + external_task_id="checks__fail_telemetry_derived__clients_last_seen__v2", + execution_delta=datetime.timedelta(seconds=36000), + check_existence=True, + mode="reschedule", + allowed_states=ALLOWED_STATES, + failed_states=FAILED_STATES, + pool="DATA_ENG_EXTERNALTASKSENSOR", + ) + + checks__warn_google_ads_derived__conversion_event_categorization__v1 = bigquery_dq_check( + task_id="checks__warn_google_ads_derived__conversion_event_categorization__v1", + source_table='conversion_event_categorization_v1${{ macros.ds_format(macros.ds_add(ds, -14), "%Y-%m-%d", "%Y%m%d") }}', + dataset_id="google_ads_derived", + project_id="moz-fx-data-shared-prod", + is_dq_check_fail=False, + owner="kwindau@mozilla.com", + email=["kwindau@mozilla.com", "telemetry-alerts@mozilla.com"], + depends_on_past=False, + parameters=["report_date:DATE:{{macros.ds_add(ds, -14)}}"] + + ["submission_date:DATE:{{ds}}"], + retries=0, + ) + + google_ads_derived__conversion_event_categorization__v1 = bigquery_etl_query( + task_id="google_ads_derived__conversion_event_categorization__v1", + destination_table='conversion_event_categorization_v1${{ macros.ds_format(macros.ds_add(ds, -14), "%Y-%m-%d", "%Y%m%d") }}', + dataset_id="google_ads_derived", + project_id="moz-fx-data-shared-prod", + owner="kwindau@mozilla.com", + email=["kwindau@mozilla.com", "telemetry-alerts@mozilla.com"], + date_partition_parameter=None, + depends_on_past=False, + parameters=["report_date:DATE:{{macros.ds_add(ds, -14)}}"] + + ["submission_date:DATE:{{ds}}"], + ) + + checks__warn_google_ads_derived__conversion_event_categorization__v1.set_upstream( + google_ads_derived__conversion_event_categorization__v1 + ) + + google_ads_derived__conversion_event_categorization__v1.set_upstream( + wait_for_checks__fail_telemetry_derived__clients_first_seen__v2 + ) + + google_ads_derived__conversion_event_categorization__v1.set_upstream( + wait_for_checks__fail_telemetry_derived__clients_last_seen__v2 + ) diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/dags/bqetl_main_summary.py /tmp/workspace/generated-sql/dags/bqetl_main_summary.py --- /tmp/workspace/main-generated-sql/dags/bqetl_main_summary.py 2024-06-04 21:06:21.000000000 +0000 +++ /tmp/workspace/generated-sql/dags/bqetl_main_summary.py 2024-06-04 21:07:40.000000000 +0000 @@ -144,6 +144,13 @@ ) ExternalTaskMarker( + task_id="bqetl_desktop_conv_evnt_categorization__wait_for_checks__fail_telemetry_derived__clients_last_seen__v2", + external_dag_id="bqetl_desktop_conv_evnt_categorization", + external_task_id="wait_for_checks__fail_telemetry_derived__clients_last_seen__v2", + execution_date="{{ (execution_date - macros.timedelta(days=-1, seconds=50400)).isoformat() }}", + ) + + ExternalTaskMarker( task_id="bqetl_search_dashboard__wait_for_checks__fail_telemetry_derived__clients_last_seen__v2", external_dag_id="bqetl_search_dashboard", external_task_id="wait_for_checks__fail_telemetry_derived__clients_last_seen__v2", Only in /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_ads: conversion_event_categorization Only in /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_ads_derived: conversion_event_categorization_v1 diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/google_ads/conversion_event_categorization/metadata.yaml /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_ads/conversion_event_categorization/metadata.yaml --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/google_ads/conversion_event_categorization/metadata.yaml 1970-01-01 00:00:00.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_ads/conversion_event_categorization/metadata.yaml 2024-06-04 21:04:55.000000000 +0000 @@ -0,0 +1,13 @@ +friendly_name: Conversion Event Categorization +description: |- + Please provide a description for the query +owners: [] +labels: {} +bigquery: null +workgroup_access: +- role: roles/bigquery.dataViewer + members: + - workgroup:mozilla-confidential +references: + view.sql: + - moz-fx-data-shared-prod.google_ads_derived.conversion_event_categorization_v1 diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/google_ads/conversion_event_categorization/view.sql /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_ads/conversion_event_categorization/view.sql --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/google_ads/conversion_event_categorization/view.sql 1970-01-01 00:00:00.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_ads/conversion_event_categorization/view.sql 2024-06-04 21:03:06.000000000 +0000 @@ -0,0 +1,7 @@ +CREATE OR REPLACE VIEW + `moz-fx-data-shared-prod.google_ads.conversion_event_categorization` +AS +SELECT + * +FROM + `moz-fx-data-shared-prod.google_ads_derived.conversion_event_categorization_v1` diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/google_ads_derived/conversion_event_categorization_v1/checks.sql /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_ads_derived/conversion_event_categorization_v1/checks.sql --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/google_ads_derived/conversion_event_categorization_v1/checks.sql 1970-01-01 00:00:00.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_ads_derived/conversion_event_categorization_v1/checks.sql 2024-06-04 21:03:06.000000000 +0000 @@ -0,0 +1,2 @@ +#warn +{{ is_unique(["client_id"]) }} diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/google_ads_derived/conversion_event_categorization_v1/metadata.yaml /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_ads_derived/conversion_event_categorization_v1/metadata.yaml --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/google_ads_derived/conversion_event_categorization_v1/metadata.yaml 1970-01-01 00:00:00.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_ads_derived/conversion_event_categorization_v1/metadata.yaml 2024-06-04 21:04:55.000000000 +0000 @@ -0,0 +1,32 @@ +friendly_name: Conversion Event Categorization +description: |- + Classifies conversion events +owners: +- kwindau@mozilla.com +labels: + incremental: true + owner1: kwindau + dag: bqetl_desktop_conv_evnt_categorization +scheduling: + dag_name: bqetl_desktop_conv_evnt_categorization + depends_on_past: false + date_partition_parameter: report_date + date_partition_offset: -14 + parameters: + - submission_date:DATE:{{ds}} +bigquery: + time_partitioning: + type: day + field: first_seen_date + require_partition_filter: true + expiration_days: null + range_partitioning: null + clustering: + fields: + - client_id + - country +workgroup_access: +- role: roles/bigquery.dataViewer + members: + - workgroup:mozilla-confidential +references: {} diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/google_ads_derived/conversion_event_categorization_v1/query.sql /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_ads_derived/conversion_event_categorization_v1/query.sql --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/google_ads_derived/conversion_event_categorization_v1/query.sql 1970-01-01 00:00:00.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_ads_derived/conversion_event_categorization_v1/query.sql 2024-06-04 21:03:06.000000000 +0000 @@ -0,0 +1,130 @@ +--STEP 1: Get clients with a first seen date = submission date - 14 days +--Note: Min cohort date is 2023-11-01 so backfilling will return nothing before then +--Note: Max cohort date cannot be more than 7 days ago (to ensure we always have at least 7 days of data) +WITH clients_first_seen_14_days_ago AS ( + SELECT + client_id, + first_seen_date, + country, + attribution_campaign, + attribution_content, + attribution_dltoken, + attribution_medium, + attribution_source + FROM + `moz-fx-data-shared-prod.telemetry.clients_first_seen` --contains all new clients, including those that never sent a main ping + WHERE + first_seen_date = @report_date --this is 14 days before {{ds}} + AND first_seen_date + BETWEEN '2023-11-01' + AND DATE_SUB(CURRENT_DATE, INTERVAL 8 DAY) +), +--Step 2: Get only the columns we need from clients last seen, for only the small window of time we need +clients_last_seen_raw AS ( + SELECT + cls.client_id, + cls.first_seen_date, + cls.country, + cls.submission_date, + cls.days_since_seen, + cls.active_hours_sum, + cls.days_visited_1_uri_bits, + cls.days_interacted_bits, + cls.search_with_ads_count_all + FROM + `moz-fx-data-shared-prod.telemetry.clients_last_seen` cls + JOIN + clients_first_seen_14_days_ago clients + ON cls.client_id = clients.client_id + WHERE + cls.submission_date >= '2023-11-01' --first cohort date + AND cls.submission_date + BETWEEN cls.first_seen_date + AND DATE_ADD(cls.first_seen_date, INTERVAL 6 DAY) --get first 7 days from their first main ping + --to process less data, we only check for pings between @submission date - 15 days and submission date + 15 days for each date this runs + AND cls.submission_date + BETWEEN DATE_SUB(@report_date, INTERVAL 1 DAY) --15 days before DS + AND DATE_ADD(@report_date, INTERVAL 29 DAY) --15 days after DS +), +--STEP 2: For every client, get the first 7 days worth of main pings sent after their first main ping +client_activity_first_7_days AS ( + SELECT + client_id, + ANY_VALUE( + first_seen_date + ) AS first_seen_date, --date we got first main ping (potentially different than above first seen date) + ANY_VALUE( + CASE + WHEN first_seen_date = submission_date + THEN country + END + ) AS country, --any country from their first day in clients_last_seen + ANY_VALUE( + CASE + WHEN submission_date = DATE_ADD(first_seen_date, INTERVAL 6 DAY) + THEN BIT_COUNT(days_visited_1_uri_bits & days_interacted_bits) + END + ) AS dou, --total # of days of activity during their first 7 days of main pings + -- if a client doesn't send a ping on `submission_date` their last active day's value will be carried forward + -- so we only take measurements from days that they send a ping. + SUM( + CASE + WHEN days_since_seen = 0 + THEN COALESCE(active_hours_sum, 0) + ELSE 0 + END + ) AS active_hours_sum, + SUM( + CASE + WHEN days_since_seen = 0 + THEN COALESCE(search_with_ads_count_all, 0) + ELSE 0 + END + ) AS search_with_ads_count_all + FROM + clients_last_seen_raw + GROUP BY + client_id +), +combined AS ( + SELECT + cfs.client_id, + cfs.first_seen_date, + cfs.attribution_campaign, + cfs.attribution_content, + cfs.attribution_dltoken, + cfs.attribution_medium, + cfs.attribution_source, + IF(cls.first_seen_date IS NOT NULL, TRUE, FALSE) AS sent_main_ping_in_first_7_days, + COALESCE( + cls.country, + cfs.country + ) AS country, -- Conversion events & LTV are based on their first observed country in CLS, use that country if its available + COALESCE(dou, 0) AS dou, + COALESCE(active_hours_sum, 0) AS active_hours_sum, + COALESCE(search_with_ads_count_all, 0) AS search_with_ads_count_all + FROM + clients_first_seen_14_days_ago AS cfs + LEFT JOIN + client_activity_first_7_days AS cls + USING (client_id) +) +SELECT + client_id, + first_seen_date, + attribution_campaign, + attribution_content, + attribution_dltoken, + attribution_medium, + attribution_source, + @submission_date AS report_date, + sent_main_ping_in_first_7_days, + country, + dou, + active_hours_sum, + search_with_ads_count_all, + IF(search_with_ads_count_all > 0 AND dou >= 5, TRUE, FALSE) AS event_1, + IF(search_with_ads_count_all > 0 AND dou >= 3, TRUE, FALSE) AS event_2, + IF(active_hours_sum >= 0.4 AND dou >= 3, TRUE, FALSE) AS event_3, +FROM + combined diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/google_ads_derived/conversion_event_categorization_v1/schema.yaml /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_ads_derived/conversion_event_categorization_v1/schema.yaml --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/google_ads_derived/conversion_event_categorization_v1/schema.yaml 1970-01-01 00:00:00.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_ads_derived/conversion_event_categorization_v1/schema.yaml 2024-06-04 21:03:06.000000000 +0000 @@ -0,0 +1,61 @@ +fields: +- mode: NULLABLE + name: client_id + type: STRING + description: Client ID +- mode: NULLABLE + name: first_seen_date + type: DATE + description: First Seen Date +- mode: NULLABLE + name: attribution_campaign + type: STRING + description: Attribution Campaign +- mode: NULLABLE + name: attribution_content + type: STRING + description: Attribution Content +- mode: NULLABLE + name: attribution_dltoken + type: STRING + description: Attribution Download Token +- mode: NULLABLE + name: attribution_medium + type: STRING + description: Attribution Medium +- mode: NULLABLE + name: attribution_source + type: STRING + description: Attribution Source +- mode: NULLABLE + name: sent_main_ping_in_first_7_days + type: BOOLEAN + description: Sent Main Ping In First 7 Days After First Seen Date Indicator +- mode: NULLABLE + name: country + type: STRING + description: Country +- mode: NULLABLE + name: dou + type: INT64 + description: DOU +- mode: NULLABLE + name: active_hours_sum + type: FLOAT + description: Active Hours Sum +- mode: NULLABLE + name: search_with_ads_count_all + type: INTEGER + description: Search With Ads Count All +- mode: NULLABLE + name: event_1 + type: BOOLEAN + description: Event 1 Indicator - 5 or more days of use and 1 or more search with ads (strictest event) +- mode: NULLABLE + name: event_2 + type: BOOLEAN + description: Event 2 Indicator - 3 or more days of use and 1 or more search with ads (medium event) +- mode: NULLABLE + name: event_3 + type: BOOLEAN + description: Event 3 Indicator - 3 or more days of use and 0.4 or more active hours (most lenient event) ```

Link to full diff

dataops-ci-bot commented 4 months ago

Integration report for "Add report date to schema"

sql.diff

Click to expand! ```diff Only in /tmp/workspace/generated-sql/dags/: bqetl_desktop_conv_evnt_categorization.py diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/dags/bqetl_analytics_tables.py /tmp/workspace/generated-sql/dags/bqetl_analytics_tables.py --- /tmp/workspace/main-generated-sql/dags/bqetl_analytics_tables.py 2024-06-04 21:11:38.000000000 +0000 +++ /tmp/workspace/generated-sql/dags/bqetl_analytics_tables.py 2024-06-04 21:13:02.000000000 +0000 @@ -324,6 +324,13 @@ ) ExternalTaskMarker( + task_id="bqetl_desktop_conv_evnt_categorization__wait_for_checks__fail_telemetry_derived__clients_first_seen__v2", + external_dag_id="bqetl_desktop_conv_evnt_categorization", + external_task_id="wait_for_checks__fail_telemetry_derived__clients_first_seen__v2", + execution_date="{{ (execution_date - macros.timedelta(days=-1, seconds=50400)).isoformat() }}", + ) + + ExternalTaskMarker( task_id="bqetl_mozilla_org_derived__wait_for_checks__fail_telemetry_derived__clients_first_seen__v2", external_dag_id="bqetl_mozilla_org_derived", external_task_id="wait_for_checks__fail_telemetry_derived__clients_first_seen__v2", diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/dags/bqetl_desktop_conv_evnt_categorization.py /tmp/workspace/generated-sql/dags/bqetl_desktop_conv_evnt_categorization.py --- /tmp/workspace/main-generated-sql/dags/bqetl_desktop_conv_evnt_categorization.py 1970-01-01 00:00:00.000000000 +0000 +++ /tmp/workspace/generated-sql/dags/bqetl_desktop_conv_evnt_categorization.py 2024-06-04 21:13:03.000000000 +0000 @@ -0,0 +1,116 @@ +# Generated via https://github.com/mozilla/bigquery-etl/blob/main/bigquery_etl/query_scheduling/generate_airflow_dags.py + +from airflow import DAG +from airflow.sensors.external_task import ExternalTaskMarker +from airflow.sensors.external_task import ExternalTaskSensor +from airflow.utils.task_group import TaskGroup +import datetime +from operators.gcp_container_operator import GKEPodOperator +from utils.constants import ALLOWED_STATES, FAILED_STATES +from utils.gcp import bigquery_etl_query, bigquery_dq_check + +docs = """ +### bqetl_desktop_conv_evnt_categorization + +Built from bigquery-etl repo, [`dags/bqetl_desktop_conv_evnt_categorization.py`](https://github.com/mozilla/bigquery-etl/blob/generated-sql/dags/bqetl_desktop_conv_evnt_categorization.py) + +#### Description + +Loads the desktop conversion event tables +#### Owner + +kwindau@mozilla.com + +#### Tags + +* impact/tier_2 +* repo/bigquery-etl +""" + + +default_args = { + "owner": "kwindau@mozilla.com", + "start_date": datetime.datetime(2024, 6, 4, 0, 0), + "end_date": None, + "email": ["kwindau@mozilla.com", "telemetry-alerts@mozilla.com"], + "depends_on_past": False, + "retry_delay": datetime.timedelta(seconds=1800), + "email_on_failure": True, + "email_on_retry": False, + "retries": 2, +} + +tags = ["impact/tier_2", "repo/bigquery-etl"] + +with DAG( + "bqetl_desktop_conv_evnt_categorization", + default_args=default_args, + schedule_interval="0 12 * * *", + doc_md=docs, + tags=tags, +) as dag: + + wait_for_checks__fail_telemetry_derived__clients_first_seen__v2 = ( + ExternalTaskSensor( + task_id="wait_for_checks__fail_telemetry_derived__clients_first_seen__v2", + external_dag_id="bqetl_analytics_tables", + external_task_id="checks__fail_telemetry_derived__clients_first_seen__v2", + execution_delta=datetime.timedelta(seconds=36000), + check_existence=True, + mode="reschedule", + allowed_states=ALLOWED_STATES, + failed_states=FAILED_STATES, + pool="DATA_ENG_EXTERNALTASKSENSOR", + ) + ) + + wait_for_checks__fail_telemetry_derived__clients_last_seen__v2 = ExternalTaskSensor( + task_id="wait_for_checks__fail_telemetry_derived__clients_last_seen__v2", + external_dag_id="bqetl_main_summary", + external_task_id="checks__fail_telemetry_derived__clients_last_seen__v2", + execution_delta=datetime.timedelta(seconds=36000), + check_existence=True, + mode="reschedule", + allowed_states=ALLOWED_STATES, + failed_states=FAILED_STATES, + pool="DATA_ENG_EXTERNALTASKSENSOR", + ) + + checks__warn_google_ads_derived__conversion_event_categorization__v1 = bigquery_dq_check( + task_id="checks__warn_google_ads_derived__conversion_event_categorization__v1", + source_table='conversion_event_categorization_v1${{ macros.ds_format(macros.ds_add(ds, -14), "%Y-%m-%d", "%Y%m%d") }}', + dataset_id="google_ads_derived", + project_id="moz-fx-data-shared-prod", + is_dq_check_fail=False, + owner="kwindau@mozilla.com", + email=["kwindau@mozilla.com", "telemetry-alerts@mozilla.com"], + depends_on_past=False, + parameters=["report_date:DATE:{{macros.ds_add(ds, -14)}}"] + + ["submission_date:DATE:{{ds}}"], + retries=0, + ) + + google_ads_derived__conversion_event_categorization__v1 = bigquery_etl_query( + task_id="google_ads_derived__conversion_event_categorization__v1", + destination_table='conversion_event_categorization_v1${{ macros.ds_format(macros.ds_add(ds, -14), "%Y-%m-%d", "%Y%m%d") }}', + dataset_id="google_ads_derived", + project_id="moz-fx-data-shared-prod", + owner="kwindau@mozilla.com", + email=["kwindau@mozilla.com", "telemetry-alerts@mozilla.com"], + date_partition_parameter=None, + depends_on_past=False, + parameters=["report_date:DATE:{{macros.ds_add(ds, -14)}}"] + + ["submission_date:DATE:{{ds}}"], + ) + + checks__warn_google_ads_derived__conversion_event_categorization__v1.set_upstream( + google_ads_derived__conversion_event_categorization__v1 + ) + + google_ads_derived__conversion_event_categorization__v1.set_upstream( + wait_for_checks__fail_telemetry_derived__clients_first_seen__v2 + ) + + google_ads_derived__conversion_event_categorization__v1.set_upstream( + wait_for_checks__fail_telemetry_derived__clients_last_seen__v2 + ) diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/dags/bqetl_main_summary.py /tmp/workspace/generated-sql/dags/bqetl_main_summary.py --- /tmp/workspace/main-generated-sql/dags/bqetl_main_summary.py 2024-06-04 21:11:38.000000000 +0000 +++ /tmp/workspace/generated-sql/dags/bqetl_main_summary.py 2024-06-04 21:13:00.000000000 +0000 @@ -144,6 +144,13 @@ ) ExternalTaskMarker( + task_id="bqetl_desktop_conv_evnt_categorization__wait_for_checks__fail_telemetry_derived__clients_last_seen__v2", + external_dag_id="bqetl_desktop_conv_evnt_categorization", + external_task_id="wait_for_checks__fail_telemetry_derived__clients_last_seen__v2", + execution_date="{{ (execution_date - macros.timedelta(days=-1, seconds=50400)).isoformat() }}", + ) + + ExternalTaskMarker( task_id="bqetl_search_dashboard__wait_for_checks__fail_telemetry_derived__clients_last_seen__v2", external_dag_id="bqetl_search_dashboard", external_task_id="wait_for_checks__fail_telemetry_derived__clients_last_seen__v2", Only in /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_ads: conversion_event_categorization Only in /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_ads_derived: conversion_event_categorization_v1 diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/google_ads/conversion_event_categorization/metadata.yaml /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_ads/conversion_event_categorization/metadata.yaml --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/google_ads/conversion_event_categorization/metadata.yaml 1970-01-01 00:00:00.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_ads/conversion_event_categorization/metadata.yaml 2024-06-04 21:10:17.000000000 +0000 @@ -0,0 +1,13 @@ +friendly_name: Conversion Event Categorization +description: |- + Please provide a description for the query +owners: [] +labels: {} +bigquery: null +workgroup_access: +- role: roles/bigquery.dataViewer + members: + - workgroup:mozilla-confidential +references: + view.sql: + - moz-fx-data-shared-prod.google_ads_derived.conversion_event_categorization_v1 diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/google_ads/conversion_event_categorization/view.sql /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_ads/conversion_event_categorization/view.sql --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/google_ads/conversion_event_categorization/view.sql 1970-01-01 00:00:00.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_ads/conversion_event_categorization/view.sql 2024-06-04 21:08:11.000000000 +0000 @@ -0,0 +1,7 @@ +CREATE OR REPLACE VIEW + `moz-fx-data-shared-prod.google_ads.conversion_event_categorization` +AS +SELECT + * +FROM + `moz-fx-data-shared-prod.google_ads_derived.conversion_event_categorization_v1` diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/google_ads_derived/conversion_event_categorization_v1/checks.sql /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_ads_derived/conversion_event_categorization_v1/checks.sql --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/google_ads_derived/conversion_event_categorization_v1/checks.sql 1970-01-01 00:00:00.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_ads_derived/conversion_event_categorization_v1/checks.sql 2024-06-04 21:08:11.000000000 +0000 @@ -0,0 +1,2 @@ +#warn +{{ is_unique(["client_id"]) }} diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/google_ads_derived/conversion_event_categorization_v1/metadata.yaml /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_ads_derived/conversion_event_categorization_v1/metadata.yaml --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/google_ads_derived/conversion_event_categorization_v1/metadata.yaml 1970-01-01 00:00:00.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_ads_derived/conversion_event_categorization_v1/metadata.yaml 2024-06-04 21:10:21.000000000 +0000 @@ -0,0 +1,32 @@ +friendly_name: Conversion Event Categorization +description: |- + Classifies conversion events +owners: +- kwindau@mozilla.com +labels: + incremental: true + owner1: kwindau + dag: bqetl_desktop_conv_evnt_categorization +scheduling: + dag_name: bqetl_desktop_conv_evnt_categorization + depends_on_past: false + date_partition_parameter: report_date + date_partition_offset: -14 + parameters: + - submission_date:DATE:{{ds}} +bigquery: + time_partitioning: + type: day + field: first_seen_date + require_partition_filter: true + expiration_days: null + range_partitioning: null + clustering: + fields: + - client_id + - country +workgroup_access: +- role: roles/bigquery.dataViewer + members: + - workgroup:mozilla-confidential +references: {} diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/google_ads_derived/conversion_event_categorization_v1/query.sql /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_ads_derived/conversion_event_categorization_v1/query.sql --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/google_ads_derived/conversion_event_categorization_v1/query.sql 1970-01-01 00:00:00.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_ads_derived/conversion_event_categorization_v1/query.sql 2024-06-04 21:08:11.000000000 +0000 @@ -0,0 +1,130 @@ +--STEP 1: Get clients with a first seen date = submission date - 14 days +--Note: Min cohort date is 2023-11-01 so backfilling will return nothing before then +--Note: Max cohort date cannot be more than 7 days ago (to ensure we always have at least 7 days of data) +WITH clients_first_seen_14_days_ago AS ( + SELECT + client_id, + first_seen_date, + country, + attribution_campaign, + attribution_content, + attribution_dltoken, + attribution_medium, + attribution_source + FROM + `moz-fx-data-shared-prod.telemetry.clients_first_seen` --contains all new clients, including those that never sent a main ping + WHERE + first_seen_date = @report_date --this is 14 days before {{ds}} + AND first_seen_date + BETWEEN '2023-11-01' + AND DATE_SUB(CURRENT_DATE, INTERVAL 8 DAY) +), +--Step 2: Get only the columns we need from clients last seen, for only the small window of time we need +clients_last_seen_raw AS ( + SELECT + cls.client_id, + cls.first_seen_date, + cls.country, + cls.submission_date, + cls.days_since_seen, + cls.active_hours_sum, + cls.days_visited_1_uri_bits, + cls.days_interacted_bits, + cls.search_with_ads_count_all + FROM + `moz-fx-data-shared-prod.telemetry.clients_last_seen` cls + JOIN + clients_first_seen_14_days_ago clients + ON cls.client_id = clients.client_id + WHERE + cls.submission_date >= '2023-11-01' --first cohort date + AND cls.submission_date + BETWEEN cls.first_seen_date + AND DATE_ADD(cls.first_seen_date, INTERVAL 6 DAY) --get first 7 days from their first main ping + --to process less data, we only check for pings between @submission date - 15 days and submission date + 15 days for each date this runs + AND cls.submission_date + BETWEEN DATE_SUB(@report_date, INTERVAL 1 DAY) --15 days before DS + AND DATE_ADD(@report_date, INTERVAL 29 DAY) --15 days after DS +), +--STEP 2: For every client, get the first 7 days worth of main pings sent after their first main ping +client_activity_first_7_days AS ( + SELECT + client_id, + ANY_VALUE( + first_seen_date + ) AS first_seen_date, --date we got first main ping (potentially different than above first seen date) + ANY_VALUE( + CASE + WHEN first_seen_date = submission_date + THEN country + END + ) AS country, --any country from their first day in clients_last_seen + ANY_VALUE( + CASE + WHEN submission_date = DATE_ADD(first_seen_date, INTERVAL 6 DAY) + THEN BIT_COUNT(days_visited_1_uri_bits & days_interacted_bits) + END + ) AS dou, --total # of days of activity during their first 7 days of main pings + -- if a client doesn't send a ping on `submission_date` their last active day's value will be carried forward + -- so we only take measurements from days that they send a ping. + SUM( + CASE + WHEN days_since_seen = 0 + THEN COALESCE(active_hours_sum, 0) + ELSE 0 + END + ) AS active_hours_sum, + SUM( + CASE + WHEN days_since_seen = 0 + THEN COALESCE(search_with_ads_count_all, 0) + ELSE 0 + END + ) AS search_with_ads_count_all + FROM + clients_last_seen_raw + GROUP BY + client_id +), +combined AS ( + SELECT + cfs.client_id, + cfs.first_seen_date, + cfs.attribution_campaign, + cfs.attribution_content, + cfs.attribution_dltoken, + cfs.attribution_medium, + cfs.attribution_source, + IF(cls.first_seen_date IS NOT NULL, TRUE, FALSE) AS sent_main_ping_in_first_7_days, + COALESCE( + cls.country, + cfs.country + ) AS country, -- Conversion events & LTV are based on their first observed country in CLS, use that country if its available + COALESCE(dou, 0) AS dou, + COALESCE(active_hours_sum, 0) AS active_hours_sum, + COALESCE(search_with_ads_count_all, 0) AS search_with_ads_count_all + FROM + clients_first_seen_14_days_ago AS cfs + LEFT JOIN + client_activity_first_7_days AS cls + USING (client_id) +) +SELECT + client_id, + first_seen_date, + attribution_campaign, + attribution_content, + attribution_dltoken, + attribution_medium, + attribution_source, + @submission_date AS report_date, + sent_main_ping_in_first_7_days, + country, + dou, + active_hours_sum, + search_with_ads_count_all, + IF(search_with_ads_count_all > 0 AND dou >= 5, TRUE, FALSE) AS event_1, + IF(search_with_ads_count_all > 0 AND dou >= 3, TRUE, FALSE) AS event_2, + IF(active_hours_sum >= 0.4 AND dou >= 3, TRUE, FALSE) AS event_3, +FROM + combined diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/google_ads_derived/conversion_event_categorization_v1/schema.yaml /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_ads_derived/conversion_event_categorization_v1/schema.yaml --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/google_ads_derived/conversion_event_categorization_v1/schema.yaml 1970-01-01 00:00:00.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_ads_derived/conversion_event_categorization_v1/schema.yaml 2024-06-04 21:08:11.000000000 +0000 @@ -0,0 +1,65 @@ +fields: +- mode: NULLABLE + name: client_id + type: STRING + description: Client ID +- mode: NULLABLE + name: first_seen_date + type: DATE + description: First Seen Date +- mode: NULLABLE + name: attribution_campaign + type: STRING + description: Attribution Campaign +- mode: NULLABLE + name: attribution_content + type: STRING + description: Attribution Content +- mode: NULLABLE + name: attribution_dltoken + type: STRING + description: Attribution Download Token +- mode: NULLABLE + name: attribution_medium + type: STRING + description: Attribution Medium +- mode: NULLABLE + name: attribution_source + type: STRING + description: Attribution Source +- mode: NULLABLE + name: report_date + type: DATE + description: Report Date +- mode: NULLABLE + name: sent_main_ping_in_first_7_days + type: BOOLEAN + description: Sent Main Ping In First 7 Days After First Seen Date Indicator +- mode: NULLABLE + name: country + type: STRING + description: Country +- mode: NULLABLE + name: dou + type: INT64 + description: DOU +- mode: NULLABLE + name: active_hours_sum + type: FLOAT + description: Active Hours Sum +- mode: NULLABLE + name: search_with_ads_count_all + type: INTEGER + description: Search With Ads Count All +- mode: NULLABLE + name: event_1 + type: BOOLEAN + description: Event 1 Indicator - 5 or more days of use and 1 or more search with ads (strictest event) +- mode: NULLABLE + name: event_2 + type: BOOLEAN + description: Event 2 Indicator - 3 or more days of use and 1 or more search with ads (medium event) +- mode: NULLABLE + name: event_3 + type: BOOLEAN + description: Event 3 Indicator - 3 or more days of use and 0.4 or more active hours (most lenient event) ```

Link to full diff