mozilla / bigquery-etl

Bigquery ETL
https://mozilla.github.io/bigquery-etl
Mozilla Public License 2.0
246 stars 98 forks source link

GROWTH-143 - Build dataset for supporting conversion event optimization #5667

Closed kwindau closed 1 month ago

kwindau commented 2 months ago

Checklist for reviewer:

For modifications to schemas in restricted namespaces (see CODEOWNERS):

┆Issue is synchronized with this Jira Task

dataops-ci-bot commented 2 months ago

Integration report for "Merge branch 'main' into mktg-desktop-conv-evnt"

sql.diff

Click to expand! ```diff Only in /tmp/workspace/generated-sql/dags/: bqetl_desktop_conv_evnt_categorization.py diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/dags/bqetl_desktop_conv_evnt_categorization.py /tmp/workspace/generated-sql/dags/bqetl_desktop_conv_evnt_categorization.py --- /tmp/workspace/main-generated-sql/dags/bqetl_desktop_conv_evnt_categorization.py 1970-01-01 00:00:00.000000000 +0000 +++ /tmp/workspace/generated-sql/dags/bqetl_desktop_conv_evnt_categorization.py 2024-05-28 17:30:46.000000000 +0000 @@ -0,0 +1,63 @@ +# Generated via https://github.com/mozilla/bigquery-etl/blob/main/bigquery_etl/query_scheduling/generate_airflow_dags.py + +from airflow import DAG +from airflow.sensors.external_task import ExternalTaskMarker +from airflow.sensors.external_task import ExternalTaskSensor +from airflow.utils.task_group import TaskGroup +import datetime +from operators.gcp_container_operator import GKEPodOperator +from utils.constants import ALLOWED_STATES, FAILED_STATES +from utils.gcp import bigquery_etl_query, bigquery_dq_check + +docs = """ +### bqetl_desktop_conv_evnt_categorization + +Built from bigquery-etl repo, [`dags/bqetl_desktop_conv_evnt_categorization.py`](https://github.com/mozilla/bigquery-etl/blob/generated-sql/dags/bqetl_desktop_conv_evnt_categorization.py) + +#### Description + +Loads the desktop conversion event tables +#### Owner + +kwindau@mozilla.com + +#### Tags + +* impact/tier_2 +* repo/bigquery-etl +""" + + +default_args = { + "owner": "kwindau@mozilla.com", + "start_date": datetime.datetime(2024, 6, 1, 0, 0), + "end_date": None, + "email": ["kwindau@mozilla.com", "telemetry-alerts@mozilla.com"], + "depends_on_past": False, + "retry_delay": datetime.timedelta(seconds=1800), + "email_on_failure": True, + "email_on_retry": False, + "retries": 2, +} + +tags = ["impact/tier_2", "repo/bigquery-etl"] + +with DAG( + "bqetl_desktop_conv_evnt_categorization", + default_args=default_args, + schedule_interval="0 12 * * *", + doc_md=docs, + tags=tags, +) as dag: + + google_ads_derived__conversion_event_categorization__v1 = bigquery_etl_query( + task_id="google_ads_derived__conversion_event_categorization__v1", + destination_table="conversion_event_categorization_v1", + dataset_id="google_ads_derived", + project_id="moz-fx-data-shared-prod", + owner="kwindau@mozilla.com", + email=["kwindau@mozilla.com", "telemetry-alerts@mozilla.com"], + date_partition_parameter=None, + depends_on_past=False, + task_concurrency=1, + ) Only in /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_ads: conversion_event_categorization Only in /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_ads_derived: conversion_event_categorization_v1 diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/google_ads/conversion_event_categorization/metadata.yaml /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_ads/conversion_event_categorization/metadata.yaml --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/google_ads/conversion_event_categorization/metadata.yaml 1970-01-01 00:00:00.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_ads/conversion_event_categorization/metadata.yaml 2024-05-28 17:27:18.000000000 +0000 @@ -0,0 +1,13 @@ +friendly_name: Conversion Event Categorization +description: |- + Please provide a description for the query +owners: [] +labels: {} +bigquery: null +workgroup_access: +- role: roles/bigquery.dataViewer + members: + - workgroup:mozilla-confidential +references: + view.sql: + - moz-fx-data-shared-prod.google_ads_derived.conversion_event_categorization_v1 diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/google_ads/conversion_event_categorization/view.sql /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_ads/conversion_event_categorization/view.sql --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/google_ads/conversion_event_categorization/view.sql 1970-01-01 00:00:00.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_ads/conversion_event_categorization/view.sql 2024-05-28 17:24:59.000000000 +0000 @@ -0,0 +1,7 @@ +CREATE OR REPLACE VIEW + `moz-fx-data-shared-prod.google_ads.conversion_event_categorization` +AS +SELECT + * +FROM + `moz-fx-data-shared-prod.google_ads_derived.conversion_event_categorization_v1` diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/google_ads_derived/conversion_event_categorization_v1/metadata.yaml /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_ads_derived/conversion_event_categorization_v1/metadata.yaml --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/google_ads_derived/conversion_event_categorization_v1/metadata.yaml 1970-01-01 00:00:00.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_ads_derived/conversion_event_categorization_v1/metadata.yaml 2024-05-28 17:27:18.000000000 +0000 @@ -0,0 +1,28 @@ +friendly_name: Conversion Event Categorization +description: |- + Classifies conversion events +owners: +- kwindau@mozilla.com +labels: + owner1: kwindau + dag: bqetl_desktop_conv_evnt_categorization +scheduling: + dag_name: bqetl_desktop_conv_evnt_categorization + depends_on_past: false + date_partition_parameter: null +bigquery: + time_partitioning: + type: day + field: first_seen_date + require_partition_filter: true + expiration_days: null + range_partitioning: null + clustering: + fields: + - client_id + - country +workgroup_access: +- role: roles/bigquery.dataViewer + members: + - workgroup:mozilla-confidential +references: {} diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/google_ads_derived/conversion_event_categorization_v1/query.sql /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_ads_derived/conversion_event_categorization_v1/query.sql --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/google_ads_derived/conversion_event_categorization_v1/query.sql 1970-01-01 00:00:00.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_ads_derived/conversion_event_categorization_v1/query.sql 2024-05-28 17:24:59.000000000 +0000 @@ -0,0 +1,113 @@ +DECLARE first_cohort_date DATE DEFAULT DATE(2024, 5, 1); + +DECLARE last_cohort_date DATE DEFAULT DATE_SUB( + CURRENT_DATE, + INTERVAL 8 DAY +); -- want to make sure the last cohort we are reporting has had their full first 7 days +WITH clients_first_seen AS ( + SELECT + client_id, + first_seen_date, + country, + attribution_campaign, + attribution_content, + attribution_dltoken, + attribution_medium, + attribution_source + FROM + `moz-fx-data-shared-prod.telemetry.clients_first_seen` --contains all new clients, including those that never sent a main ping + WHERE + first_seen_date + BETWEEN first_cohort_date + AND last_cohort_date +), +clients_last_seen AS ( + SELECT + client_id, + -- note that this is the date we got each client's first main ping, which is potentially a different date from the first_seen_date above. + ANY_VALUE(first_seen_date) AS first_seen_date, + -- just the country value from their first main ping day + ANY_VALUE(CASE WHEN first_seen_date = submission_date THEN country END) AS country, + -- the date we would report on their conversion events (after we get data from their 7th day) + ANY_VALUE( + IF(submission_date = DATE_ADD(first_seen_date, INTERVAL 6 DAY), submission_date, NULL) + ) AS report_date, + -- their first week days of use, taken from their 7th day data + ANY_VALUE( + CASE + WHEN submission_date = DATE_ADD(first_seen_date, INTERVAL 6 DAY) + THEN BIT_COUNT(days_visited_1_uri_bits & days_interacted_bits) + END + ) AS dou, + -- if a client doesn't send a ping on `submission_date` their last active day's value will be carried forward + -- so we only take measurements from days that they send a ping. + SUM( + CASE + WHEN days_since_seen = 0 + THEN COALESCE(active_hours_sum, 0) + ELSE 0 + END + ) AS active_hours_sum, + SUM( + CASE + WHEN days_since_seen = 0 + THEN COALESCE(search_with_ads_count_all, 0) + ELSE 0 + END + ) AS search_with_ads_count_all + FROM + `moz-fx-data-shared-prod.telemetry.clients_last_seen` + WHERE + submission_date >= first_cohort_date + AND submission_date + BETWEEN first_seen_date + AND DATE_ADD(first_seen_date, INTERVAL 6 DAY) + GROUP BY + client_id +), +combined AS ( + SELECT + client_id, + -- we should use their actual first date as their cohort date + cfs.first_seen_date, + cfs.attribution_campaign, + cfs.attribution_content, + cfs.attribution_dltoken, + cfs.attribution_medium, + cfs.attribution_source, + cls.report_date, + -- not a strictly necessary field, used to see how many clients actually send a main ping + IF(cls.first_seen_date IS NOT NULL, TRUE, FALSE) AS sent_main_ping, + -- because the conversion events and ltv are based on their first observed country in CLS, use that country if its available. + COALESCE(cls.country, cfs.country) AS country, + COALESCE(dou, 0) AS dou, + COALESCE(active_hours_sum, 0) AS active_hours_sum, + COALESCE(search_with_ads_count_all, 0) AS search_with_ads_count_all + FROM + cfs + LEFT JOIN + cls + USING (client_id) +) +SELECT + client_id, + first_seen_date, + attribution_campaign, + attribution_content, + attribution_dltoken, + attribution_medium, + attribution_source, + report_date, + sent_main_ping, + country, + dou, + active_hours_sum, + search_with_ads_count_all, + --"strictest" event + IF(report_date IS NOT NULL, (search_with_ads_count_all > 0) AND (dou >= 5), NULL) AS event_1, + -- "medium" event + IF(report_date IS NOT NULL, (search_with_ads_count_all > 0) AND (dou >= 3), NULL) AS event_2, + -- "most_lenient" event + IF(report_date IS NOT NULL, (active_hours_sum >= 0.4) AND (dou >= 3), NULL) AS event_3, +FROM + combined diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/google_ads_derived/conversion_event_categorization_v1/schema.yaml /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_ads_derived/conversion_event_categorization_v1/schema.yaml --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/google_ads_derived/conversion_event_categorization_v1/schema.yaml 1970-01-01 00:00:00.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_ads_derived/conversion_event_categorization_v1/schema.yaml 2024-05-28 17:24:59.000000000 +0000 @@ -0,0 +1,69 @@ +fields: +- mode: NULLABLE + name: client_id + type: STRING + description: Client ID +- mode: NULLABLE + name: first_seen_date + type: DATE + description: First Seen Date +- mode: NULLABLE + name: attribution_campaign + type: STRING + description: Attribution Campaign +- mode: NULLABLE + name: attribution_content + type: STRING + description: Attribution Content +- mode: NULLABLE + name: attribution_dltoken + type: STRING + description: Attribution Download Token +- mode: NULLABLE + name: attribution_medium + type: STRING + description: Attribution Medium +- mode: NULLABLE + name: attribution_source + type: STRING + description: Attribution Source +- mode: NULLABLE + name: report_date + type: DATE + description: Report Date +- mode: NULLABLE + name: sent_main_ping + type: BOOLEAN + description: Sent Main Ping Indicator +- mode: NULLABLE + name: country + type: STRING + description: Country +- mode: NULLABLE + name: dou + type: INT64 + description: DOU +- mode: NULLABLE + name: active_hours_sum + type: NUMERIC + description: Active Hours Sum +- mode: NULLABLE + name: account_name + type: STRING + description: The name of the Google Ads account, e.g. Mozilla Firefox. +- mode: NULLABLE + name: search_with_ads_count_all + type: INTEGER + description: Search With Ads Count All +- mode: NULLABLE + name: event_1 + type: BOOLEAN + description: Event 1 Indicator +- mode: NULLABLE + name: event_2 + type: BOOLEAN + description: Event 2 Indicator +- mode: NULLABLE + name: event_3 + type: BOOLEAN + description: Event 3 Indicator ```

Link to full diff

dataops-ci-bot commented 2 months ago

Integration report for "Merge branch 'main' into mktg-desktop-conv-evnt"

sql.diff

Click to expand! ```diff Only in /tmp/workspace/generated-sql/dags/: bqetl_desktop_conv_evnt_categorization.py diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/dags/bqetl_desktop_conv_evnt_categorization.py /tmp/workspace/generated-sql/dags/bqetl_desktop_conv_evnt_categorization.py --- /tmp/workspace/main-generated-sql/dags/bqetl_desktop_conv_evnt_categorization.py 1970-01-01 00:00:00.000000000 +0000 +++ /tmp/workspace/generated-sql/dags/bqetl_desktop_conv_evnt_categorization.py 2024-05-28 18:03:15.000000000 +0000 @@ -0,0 +1,63 @@ +# Generated via https://github.com/mozilla/bigquery-etl/blob/main/bigquery_etl/query_scheduling/generate_airflow_dags.py + +from airflow import DAG +from airflow.sensors.external_task import ExternalTaskMarker +from airflow.sensors.external_task import ExternalTaskSensor +from airflow.utils.task_group import TaskGroup +import datetime +from operators.gcp_container_operator import GKEPodOperator +from utils.constants import ALLOWED_STATES, FAILED_STATES +from utils.gcp import bigquery_etl_query, bigquery_dq_check + +docs = """ +### bqetl_desktop_conv_evnt_categorization + +Built from bigquery-etl repo, [`dags/bqetl_desktop_conv_evnt_categorization.py`](https://github.com/mozilla/bigquery-etl/blob/generated-sql/dags/bqetl_desktop_conv_evnt_categorization.py) + +#### Description + +Loads the desktop conversion event tables +#### Owner + +kwindau@mozilla.com + +#### Tags + +* impact/tier_2 +* repo/bigquery-etl +""" + + +default_args = { + "owner": "kwindau@mozilla.com", + "start_date": datetime.datetime(2024, 6, 1, 0, 0), + "end_date": None, + "email": ["kwindau@mozilla.com", "telemetry-alerts@mozilla.com"], + "depends_on_past": False, + "retry_delay": datetime.timedelta(seconds=1800), + "email_on_failure": True, + "email_on_retry": False, + "retries": 2, +} + +tags = ["impact/tier_2", "repo/bigquery-etl"] + +with DAG( + "bqetl_desktop_conv_evnt_categorization", + default_args=default_args, + schedule_interval="0 12 * * *", + doc_md=docs, + tags=tags, +) as dag: + + google_ads_derived__conversion_event_categorization__v1 = bigquery_etl_query( + task_id="google_ads_derived__conversion_event_categorization__v1", + destination_table="conversion_event_categorization_v1", + dataset_id="google_ads_derived", + project_id="moz-fx-data-shared-prod", + owner="kwindau@mozilla.com", + email=["kwindau@mozilla.com", "telemetry-alerts@mozilla.com"], + date_partition_parameter=None, + depends_on_past=False, + task_concurrency=1, + ) Only in /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_ads: conversion_event_categorization Only in /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_ads_derived: conversion_event_categorization_v1 diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/google_ads/conversion_event_categorization/metadata.yaml /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_ads/conversion_event_categorization/metadata.yaml --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/google_ads/conversion_event_categorization/metadata.yaml 1970-01-01 00:00:00.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_ads/conversion_event_categorization/metadata.yaml 2024-05-28 18:00:21.000000000 +0000 @@ -0,0 +1,13 @@ +friendly_name: Conversion Event Categorization +description: |- + Please provide a description for the query +owners: [] +labels: {} +bigquery: null +workgroup_access: +- role: roles/bigquery.dataViewer + members: + - workgroup:mozilla-confidential +references: + view.sql: + - moz-fx-data-shared-prod.google_ads_derived.conversion_event_categorization_v1 diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/google_ads/conversion_event_categorization/view.sql /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_ads/conversion_event_categorization/view.sql --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/google_ads/conversion_event_categorization/view.sql 1970-01-01 00:00:00.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_ads/conversion_event_categorization/view.sql 2024-05-28 17:58:32.000000000 +0000 @@ -0,0 +1,7 @@ +CREATE OR REPLACE VIEW + `moz-fx-data-shared-prod.google_ads.conversion_event_categorization` +AS +SELECT + * +FROM + `moz-fx-data-shared-prod.google_ads_derived.conversion_event_categorization_v1` diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/google_ads_derived/conversion_event_categorization_v1/metadata.yaml /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_ads_derived/conversion_event_categorization_v1/metadata.yaml --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/google_ads_derived/conversion_event_categorization_v1/metadata.yaml 1970-01-01 00:00:00.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_ads_derived/conversion_event_categorization_v1/metadata.yaml 2024-05-28 18:00:22.000000000 +0000 @@ -0,0 +1,28 @@ +friendly_name: Conversion Event Categorization +description: |- + Classifies conversion events +owners: +- kwindau@mozilla.com +labels: + owner1: kwindau + dag: bqetl_desktop_conv_evnt_categorization +scheduling: + dag_name: bqetl_desktop_conv_evnt_categorization + depends_on_past: false + date_partition_parameter: null +bigquery: + time_partitioning: + type: day + field: first_seen_date + require_partition_filter: true + expiration_days: null + range_partitioning: null + clustering: + fields: + - client_id + - country +workgroup_access: +- role: roles/bigquery.dataViewer + members: + - workgroup:mozilla-confidential +references: {} diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/google_ads_derived/conversion_event_categorization_v1/query.sql /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_ads_derived/conversion_event_categorization_v1/query.sql --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/google_ads_derived/conversion_event_categorization_v1/query.sql 1970-01-01 00:00:00.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_ads_derived/conversion_event_categorization_v1/query.sql 2024-05-28 17:58:32.000000000 +0000 @@ -0,0 +1,113 @@ +DECLARE first_cohort_date DATE DEFAULT DATE(2024, 5, 1); + +DECLARE last_cohort_date DATE DEFAULT DATE_SUB( + CURRENT_DATE, + INTERVAL 8 DAY +); -- want to make sure the last cohort we are reporting has had their full first 7 days +WITH clients_first_seen AS ( + SELECT + client_id, + first_seen_date, + country, + attribution_campaign, + attribution_content, + attribution_dltoken, + attribution_medium, + attribution_source + FROM + `moz-fx-data-shared-prod.telemetry.clients_first_seen` --contains all new clients, including those that never sent a main ping + WHERE + first_seen_date + BETWEEN first_cohort_date + AND last_cohort_date +), +clients_last_seen AS ( + SELECT + client_id, + -- note that this is the date we got each client's first main ping, which is potentially a different date from the first_seen_date above. + ANY_VALUE(first_seen_date) AS first_seen_date, + -- just the country value from their first main ping day + ANY_VALUE(CASE WHEN first_seen_date = submission_date THEN country END) AS country, + -- the date we would report on their conversion events (after we get data from their 7th day) + ANY_VALUE( + IF(submission_date = DATE_ADD(first_seen_date, INTERVAL 6 DAY), submission_date, NULL) + ) AS report_date, + -- their first week days of use, taken from their 7th day data + ANY_VALUE( + CASE + WHEN submission_date = DATE_ADD(first_seen_date, INTERVAL 6 DAY) + THEN BIT_COUNT(days_visited_1_uri_bits & days_interacted_bits) + END + ) AS dou, + -- if a client doesn't send a ping on `submission_date` their last active day's value will be carried forward + -- so we only take measurements from days that they send a ping. + SUM( + CASE + WHEN days_since_seen = 0 + THEN COALESCE(active_hours_sum, 0) + ELSE 0 + END + ) AS active_hours_sum, + SUM( + CASE + WHEN days_since_seen = 0 + THEN COALESCE(search_with_ads_count_all, 0) + ELSE 0 + END + ) AS search_with_ads_count_all + FROM + `moz-fx-data-shared-prod.telemetry.clients_last_seen` + WHERE + submission_date >= first_cohort_date + AND submission_date + BETWEEN first_seen_date + AND DATE_ADD(first_seen_date, INTERVAL 6 DAY) + GROUP BY + client_id +), +combined AS ( + SELECT + client_id, + -- we should use their actual first date as their cohort date + cfs.first_seen_date, + cfs.attribution_campaign, + cfs.attribution_content, + cfs.attribution_dltoken, + cfs.attribution_medium, + cfs.attribution_source, + cls.report_date, + -- not a strictly necessary field, used to see how many clients actually send a main ping + IF(cls.first_seen_date IS NOT NULL, TRUE, FALSE) AS sent_main_ping, + -- because the conversion events and ltv are based on their first observed country in CLS, use that country if its available. + COALESCE(cls.country, cfs.country) AS country, + COALESCE(dou, 0) AS dou, + COALESCE(active_hours_sum, 0) AS active_hours_sum, + COALESCE(search_with_ads_count_all, 0) AS search_with_ads_count_all + FROM + cfs + LEFT JOIN + cls + USING (client_id) +) +SELECT + client_id, + first_seen_date, + attribution_campaign, + attribution_content, + attribution_dltoken, + attribution_medium, + attribution_source, + report_date, + sent_main_ping, + country, + dou, + active_hours_sum, + search_with_ads_count_all, + --"strictest" event + IF(report_date IS NOT NULL, (search_with_ads_count_all > 0) AND (dou >= 5), NULL) AS event_1, + -- "medium" event + IF(report_date IS NOT NULL, (search_with_ads_count_all > 0) AND (dou >= 3), NULL) AS event_2, + -- "most_lenient" event + IF(report_date IS NOT NULL, (active_hours_sum >= 0.4) AND (dou >= 3), NULL) AS event_3, +FROM + combined diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/google_ads_derived/conversion_event_categorization_v1/schema.yaml /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_ads_derived/conversion_event_categorization_v1/schema.yaml --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/google_ads_derived/conversion_event_categorization_v1/schema.yaml 1970-01-01 00:00:00.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_ads_derived/conversion_event_categorization_v1/schema.yaml 2024-05-28 17:58:32.000000000 +0000 @@ -0,0 +1,69 @@ +fields: +- mode: NULLABLE + name: client_id + type: STRING + description: Client ID +- mode: NULLABLE + name: first_seen_date + type: DATE + description: First Seen Date +- mode: NULLABLE + name: attribution_campaign + type: STRING + description: Attribution Campaign +- mode: NULLABLE + name: attribution_content + type: STRING + description: Attribution Content +- mode: NULLABLE + name: attribution_dltoken + type: STRING + description: Attribution Download Token +- mode: NULLABLE + name: attribution_medium + type: STRING + description: Attribution Medium +- mode: NULLABLE + name: attribution_source + type: STRING + description: Attribution Source +- mode: NULLABLE + name: report_date + type: DATE + description: Report Date +- mode: NULLABLE + name: sent_main_ping + type: BOOLEAN + description: Sent Main Ping Indicator +- mode: NULLABLE + name: country + type: STRING + description: Country +- mode: NULLABLE + name: dou + type: INT64 + description: DOU +- mode: NULLABLE + name: active_hours_sum + type: NUMERIC + description: Active Hours Sum +- mode: NULLABLE + name: account_name + type: STRING + description: The name of the Google Ads account, e.g. Mozilla Firefox. +- mode: NULLABLE + name: search_with_ads_count_all + type: INTEGER + description: Search With Ads Count All +- mode: NULLABLE + name: event_1 + type: BOOLEAN + description: Event 1 Indicator +- mode: NULLABLE + name: event_2 + type: BOOLEAN + description: Event 2 Indicator +- mode: NULLABLE + name: event_3 + type: BOOLEAN + description: Event 3 Indicator ```

Link to full diff

kwindau commented 1 month ago

Closing & opening a new PR since now has conflicts