mozilla / bigquery-etl

Bigquery ETL
https://mozilla.github.io/bigquery-etl
Mozilla Public License 2.0
241 stars 98 forks source link

Gclid Conversions - update time offset #5814

Closed kwindau closed 1 week ago

kwindau commented 1 week ago

Checklist for reviewer:

For modifications to schemas in restricted namespaces (see CODEOWNERS):

┆Issue is synchronized with this Jira Task

dataops-ci-bot commented 1 week ago

Integration report for "Fix metadata file"

sql.diff

Click to expand! ```diff diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/dags/bqetl_google_analytics_derived_ga4.py /tmp/workspace/generated-sql/dags/bqetl_google_analytics_derived_ga4.py --- /tmp/workspace/main-generated-sql/dags/bqetl_google_analytics_derived_ga4.py 2024-06-20 13:08:57.000000000 +0000 +++ /tmp/workspace/generated-sql/dags/bqetl_google_analytics_derived_ga4.py 2024-06-20 13:09:42.000000000 +0000 @@ -162,7 +162,8 @@ "telemetry-alerts@mozilla.com", ], depends_on_past=False, - parameters=["conversion_window:INT64:30"] + ["activity_date:DATE:{{ds}}"], + parameters=["conversion_window:INT64:30", "activity_date:DATE:{{ds}}"] + + ["activity_date:DATE:{{ds}}"], retries=0, ) @@ -384,7 +385,7 @@ date_partition_parameter=None, depends_on_past=False, parameters=["activity_date:DATE:{{macros.ds_add(ds, -14)}}"] - + ["conversion_window:INT64:30"], + + ["conversion_window:INT64:30", "activity_date:DATE:{{ds}}"], ) mozilla_vpn_derived__site_metrics_summary__v2 = bigquery_etl_query( diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/mozilla_org_derived/gclid_conversions_v2/metadata.yaml /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/mozilla_org_derived/gclid_conversions_v2/metadata.yaml --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/mozilla_org_derived/gclid_conversions_v2/metadata.yaml 2024-06-20 13:04:49.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/mozilla_org_derived/gclid_conversions_v2/metadata.yaml 2024-06-20 13:04:46.000000000 +0000 @@ -14,11 +14,11 @@ scheduling: dag_name: bqetl_google_analytics_derived_ga4 date_partition_parameter: activity_date - parameters: ["conversion_window:INT64:30"] + parameters: ["conversion_window:INT64:30", "activity_date:DATE:{{ds}}"] bigquery: time_partitioning: type: day - field: 'activity_date' + field: activity_date require_partition_filter: false expiration_days: null references: {} ```

Link to full diff

dataops-ci-bot commented 1 week ago

Integration report for "Fix metadata file"

sql.diff

Click to expand! ```diff diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/dags/bqetl_google_analytics_derived_ga4.py /tmp/workspace/generated-sql/dags/bqetl_google_analytics_derived_ga4.py --- /tmp/workspace/main-generated-sql/dags/bqetl_google_analytics_derived_ga4.py 2024-06-20 13:15:55.000000000 +0000 +++ /tmp/workspace/generated-sql/dags/bqetl_google_analytics_derived_ga4.py 2024-06-20 13:17:20.000000000 +0000 @@ -162,7 +162,8 @@ "telemetry-alerts@mozilla.com", ], depends_on_past=False, - parameters=["conversion_window:INT64:30"] + ["activity_date:DATE:{{ds}}"], + parameters=["conversion_window:INT64:30", "activity_date:DATE:{{ds}}"] + + ["activity_date:DATE:{{ds}}"], retries=0, ) @@ -384,7 +385,7 @@ date_partition_parameter=None, depends_on_past=False, parameters=["activity_date:DATE:{{macros.ds_add(ds, -14)}}"] - + ["conversion_window:INT64:30"], + + ["conversion_window:INT64:30", "activity_date:DATE:{{ds}}"], ) mozilla_vpn_derived__site_metrics_summary__v2 = bigquery_etl_query( diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/mozilla_org_derived/ga_desktop_conversions_v1/metadata.yaml /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/mozilla_org_derived/ga_desktop_conversions_v1/metadata.yaml --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/mozilla_org_derived/ga_desktop_conversions_v1/metadata.yaml 2024-06-20 13:12:30.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/mozilla_org_derived/ga_desktop_conversions_v1/metadata.yaml 2024-06-20 13:14:28.000000000 +0000 @@ -5,7 +5,8 @@ - kwindau@mozilla.com labels: incremental: true - owner1: kwindau@mozilla.com + owner1: kwindau + dag: bqetl_census_feed scheduling: dag_name: bqetl_census_feed date_partition_parameter: activity_date @@ -22,7 +23,8 @@ clustering: fields: - conversion_name -references: {} workgroup_access: - role: roles/bigquery.dataViewer - members: [workgroup:dataops-managed/external-census] + members: + - workgroup:mozilla-confidential +references: {} diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/mozilla_org_derived/gclid_conversions_v2/metadata.yaml /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/mozilla_org_derived/gclid_conversions_v2/metadata.yaml --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/mozilla_org_derived/gclid_conversions_v2/metadata.yaml 2024-06-20 13:12:30.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/mozilla_org_derived/gclid_conversions_v2/metadata.yaml 2024-06-20 13:12:40.000000000 +0000 @@ -14,11 +14,11 @@ scheduling: dag_name: bqetl_google_analytics_derived_ga4 date_partition_parameter: activity_date - parameters: ["conversion_window:INT64:30"] + parameters: ["conversion_window:INT64:30", "activity_date:DATE:{{ds}}"] bigquery: time_partitioning: type: day - field: 'activity_date' + field: activity_date require_partition_filter: false expiration_days: null references: {} ```

Link to full diff

dataops-ci-bot commented 1 week ago

Integration report for "Merge branch 'main' into fix-gclid-conv"

sql.diff

Click to expand! ```diff diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/dags/bqetl_google_analytics_derived_ga4.py /tmp/workspace/generated-sql/dags/bqetl_google_analytics_derived_ga4.py --- /tmp/workspace/main-generated-sql/dags/bqetl_google_analytics_derived_ga4.py 2024-06-20 13:15:54.000000000 +0000 +++ /tmp/workspace/generated-sql/dags/bqetl_google_analytics_derived_ga4.py 2024-06-20 13:17:46.000000000 +0000 @@ -162,7 +162,8 @@ "telemetry-alerts@mozilla.com", ], depends_on_past=False, - parameters=["conversion_window:INT64:30"] + ["activity_date:DATE:{{ds}}"], + parameters=["conversion_window:INT64:30", "activity_date:DATE:{{ds}}"] + + ["activity_date:DATE:{{ds}}"], retries=0, ) @@ -384,7 +385,7 @@ date_partition_parameter=None, depends_on_past=False, parameters=["activity_date:DATE:{{macros.ds_add(ds, -14)}}"] - + ["conversion_window:INT64:30"], + + ["conversion_window:INT64:30", "activity_date:DATE:{{ds}}"], ) mozilla_vpn_derived__site_metrics_summary__v2 = bigquery_etl_query( diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/mozilla_org_derived/gclid_conversions_v2/metadata.yaml /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/mozilla_org_derived/gclid_conversions_v2/metadata.yaml --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/mozilla_org_derived/gclid_conversions_v2/metadata.yaml 2024-06-20 13:12:39.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/mozilla_org_derived/gclid_conversions_v2/metadata.yaml 2024-06-20 13:12:55.000000000 +0000 @@ -14,11 +14,11 @@ scheduling: dag_name: bqetl_google_analytics_derived_ga4 date_partition_parameter: activity_date - parameters: ["conversion_window:INT64:30"] + parameters: ["conversion_window:INT64:30", "activity_date:DATE:{{ds}}"] bigquery: time_partitioning: type: day - field: 'activity_date' + field: activity_date require_partition_filter: false expiration_days: null references: {} ```

Link to full diff

dataops-ci-bot commented 1 week ago

Integration report for "Merge branch 'main' into fix-gclid-conv"

sql.diff

Click to expand! ```diff diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/dags/bqetl_google_analytics_derived_ga4.py /tmp/workspace/generated-sql/dags/bqetl_google_analytics_derived_ga4.py --- /tmp/workspace/main-generated-sql/dags/bqetl_google_analytics_derived_ga4.py 2024-06-20 15:42:20.000000000 +0000 +++ /tmp/workspace/generated-sql/dags/bqetl_google_analytics_derived_ga4.py 2024-06-20 15:43:34.000000000 +0000 @@ -162,7 +162,8 @@ "telemetry-alerts@mozilla.com", ], depends_on_past=False, - parameters=["conversion_window:INT64:30"] + ["activity_date:DATE:{{ds}}"], + parameters=["conversion_window:INT64:30", "activity_date:DATE:{{ds}}"] + + ["activity_date:DATE:{{ds}}"], retries=0, ) @@ -384,7 +385,7 @@ date_partition_parameter=None, depends_on_past=False, parameters=["activity_date:DATE:{{macros.ds_add(ds, -14)}}"] - + ["conversion_window:INT64:30"], + + ["conversion_window:INT64:30", "activity_date:DATE:{{ds}}"], ) mozilla_vpn_derived__site_metrics_summary__v2 = bigquery_etl_query( diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/mozilla_org_derived/gclid_conversions_v2/metadata.yaml /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/mozilla_org_derived/gclid_conversions_v2/metadata.yaml --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/mozilla_org_derived/gclid_conversions_v2/metadata.yaml 2024-06-20 15:38:35.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/mozilla_org_derived/gclid_conversions_v2/metadata.yaml 2024-06-20 15:38:36.000000000 +0000 @@ -14,11 +14,11 @@ scheduling: dag_name: bqetl_google_analytics_derived_ga4 date_partition_parameter: activity_date - parameters: ["conversion_window:INT64:30"] + parameters: ["conversion_window:INT64:30", "activity_date:DATE:{{ds}}"] bigquery: time_partitioning: type: day - field: 'activity_date' + field: activity_date require_partition_filter: false expiration_days: null references: {} ```

Link to full diff

dataops-ci-bot commented 1 week ago

Integration report for "Merge branch 'main' into fix-gclid-conv"

sql.diff

Click to expand! ```diff diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/dags/bqetl_google_analytics_derived_ga4.py /tmp/workspace/generated-sql/dags/bqetl_google_analytics_derived_ga4.py --- /tmp/workspace/main-generated-sql/dags/bqetl_google_analytics_derived_ga4.py 2024-06-20 17:06:28.000000000 +0000 +++ /tmp/workspace/generated-sql/dags/bqetl_google_analytics_derived_ga4.py 2024-06-20 17:08:02.000000000 +0000 @@ -162,7 +162,8 @@ "telemetry-alerts@mozilla.com", ], depends_on_past=False, - parameters=["conversion_window:INT64:30"] + ["activity_date:DATE:{{ds}}"], + parameters=["conversion_window:INT64:30", "activity_date:DATE:{{ds}}"] + + ["activity_date:DATE:{{ds}}"], retries=0, ) @@ -384,7 +385,7 @@ date_partition_parameter=None, depends_on_past=False, parameters=["activity_date:DATE:{{macros.ds_add(ds, -14)}}"] - + ["conversion_window:INT64:30"], + + ["conversion_window:INT64:30", "activity_date:DATE:{{ds}}"], ) mozilla_vpn_derived__site_metrics_summary__v2 = bigquery_etl_query( diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/mozilla_org_derived/gclid_conversions_v2/metadata.yaml /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/mozilla_org_derived/gclid_conversions_v2/metadata.yaml --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/mozilla_org_derived/gclid_conversions_v2/metadata.yaml 2024-06-20 17:02:52.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/mozilla_org_derived/gclid_conversions_v2/metadata.yaml 2024-06-20 17:03:18.000000000 +0000 @@ -14,11 +14,11 @@ scheduling: dag_name: bqetl_google_analytics_derived_ga4 date_partition_parameter: activity_date - parameters: ["conversion_window:INT64:30"] + parameters: ["conversion_window:INT64:30", "activity_date:DATE:{{ds}}"] bigquery: time_partitioning: type: day - field: 'activity_date' + field: activity_date require_partition_filter: false expiration_days: null references: {} ```

Link to full diff

dataops-ci-bot commented 1 week ago

Integration report for "Merge branch 'main' into fix-gclid-conv"

sql.diff

Click to expand! ```diff diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/dags/bqetl_google_analytics_derived_ga4.py /tmp/workspace/generated-sql/dags/bqetl_google_analytics_derived_ga4.py --- /tmp/workspace/main-generated-sql/dags/bqetl_google_analytics_derived_ga4.py 2024-06-20 17:28:10.000000000 +0000 +++ /tmp/workspace/generated-sql/dags/bqetl_google_analytics_derived_ga4.py 2024-06-20 17:29:45.000000000 +0000 @@ -162,7 +162,8 @@ "telemetry-alerts@mozilla.com", ], depends_on_past=False, - parameters=["conversion_window:INT64:30"] + ["activity_date:DATE:{{ds}}"], + parameters=["conversion_window:INT64:30", "submission_date:DATE:{{ds}}"] + + ["submission_date:DATE:{{ds}}"], retries=0, ) @@ -372,7 +373,7 @@ mozilla_org_derived__gclid_conversions__v2 = bigquery_etl_query( task_id="mozilla_org_derived__gclid_conversions__v2", - destination_table='gclid_conversions_v2${{ macros.ds_format(macros.ds_add(ds, -14), "%Y-%m-%d", "%Y%m%d") }}', + destination_table="gclid_conversions_v2", dataset_id="mozilla_org_derived", project_id="moz-fx-data-shared-prod", owner="mhirose@mozilla.com", @@ -381,10 +382,9 @@ "mhirose@mozilla.com", "telemetry-alerts@mozilla.com", ], - date_partition_parameter=None, + date_partition_parameter="submission_date", depends_on_past=False, - parameters=["activity_date:DATE:{{macros.ds_add(ds, -14)}}"] - + ["conversion_window:INT64:30"], + parameters=["conversion_window:INT64:30", "submission_date:DATE:{{ds}}"], ) mozilla_vpn_derived__site_metrics_summary__v2 = bigquery_etl_query( diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/mozilla_org_derived/gclid_conversions_v2/metadata.yaml /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/mozilla_org_derived/gclid_conversions_v2/metadata.yaml --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/mozilla_org_derived/gclid_conversions_v2/metadata.yaml 2024-06-20 17:24:21.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/mozilla_org_derived/gclid_conversions_v2/metadata.yaml 2024-06-20 17:24:39.000000000 +0000 @@ -13,12 +13,13 @@ owner1: mhirose@mozilla.com scheduling: dag_name: bqetl_google_analytics_derived_ga4 - date_partition_parameter: activity_date - parameters: ["conversion_window:INT64:30"] + date_partition_parameter: submission_date + parameters: ["conversion_window:INT64:30", "submission_date:DATE:{{ds}}"] + date_partition_offset: 0 bigquery: time_partitioning: type: day - field: 'activity_date' + field: activity_date require_partition_filter: false expiration_days: null references: {} diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/mozilla_org_derived/gclid_conversions_v2/query.sql /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/mozilla_org_derived/gclid_conversions_v2/query.sql --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/mozilla_org_derived/gclid_conversions_v2/query.sql 2024-06-20 17:24:21.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/mozilla_org_derived/gclid_conversions_v2/query.sql 2024-06-20 17:24:39.000000000 +0000 @@ -10,9 +10,9 @@ CROSS JOIN UNNEST(all_reported_stub_session_ids) AS stub_session_id WHERE - session_date >= DATE_SUB(@activity_date, INTERVAL @conversion_window DAY) + session_date >= DATE_SUB(@submission_date, INTERVAL @conversion_window DAY) -- Next line is needed for backfilling purposes - AND session_date <= @activity_date + AND session_date <= @submission_date AND gclid IS NOT NULL ), --Step 2: Get all the download tokens associated with a known GA client ID & stub session ID @@ -50,7 +50,7 @@ FROM `moz-fx-data-shared-prod.telemetry_derived.clients_daily_v6` WHERE - submission_date = @activity_date + submission_date = @submission_date UNION ALL SELECT client_id AS telemetry_client_id, @@ -65,8 +65,8 @@ `moz-fx-data-shared-prod.google_ads_derived.conversion_event_categorization_v1` WHERE (event_1 IS TRUE OR event_2 IS TRUE OR event_3 IS TRUE) - AND report_date = @activity_date - AND first_seen_date < @activity_date --needed since this is a required partition filter + AND report_date = @submission_date + AND first_seen_date < @submission_date --needed since this is a required partition filter ), telemetry_id_to_activity AS ( SELECT ```

Link to full diff

dataops-ci-bot commented 1 week ago

Integration report for "GROWTH-143 - fix DAG auto generation for gclid conversions v2 task"

sql.diff

Click to expand! ```diff diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/dags/bqetl_google_analytics_derived_ga4.py /tmp/workspace/generated-sql/dags/bqetl_google_analytics_derived_ga4.py --- /tmp/workspace/main-generated-sql/dags/bqetl_google_analytics_derived_ga4.py 2024-06-20 17:29:34.000000000 +0000 +++ /tmp/workspace/generated-sql/dags/bqetl_google_analytics_derived_ga4.py 2024-06-20 17:31:05.000000000 +0000 @@ -162,7 +162,8 @@ "telemetry-alerts@mozilla.com", ], depends_on_past=False, - parameters=["conversion_window:INT64:30"] + ["activity_date:DATE:{{ds}}"], + parameters=["conversion_window:INT64:30", "submission_date:DATE:{{ds}}"] + + ["submission_date:DATE:{{ds}}"], retries=0, ) @@ -372,7 +373,7 @@ mozilla_org_derived__gclid_conversions__v2 = bigquery_etl_query( task_id="mozilla_org_derived__gclid_conversions__v2", - destination_table='gclid_conversions_v2${{ macros.ds_format(macros.ds_add(ds, -14), "%Y-%m-%d", "%Y%m%d") }}', + destination_table="gclid_conversions_v2", dataset_id="mozilla_org_derived", project_id="moz-fx-data-shared-prod", owner="mhirose@mozilla.com", @@ -381,10 +382,9 @@ "mhirose@mozilla.com", "telemetry-alerts@mozilla.com", ], - date_partition_parameter=None, + date_partition_parameter="submission_date", depends_on_past=False, - parameters=["activity_date:DATE:{{macros.ds_add(ds, -14)}}"] - + ["conversion_window:INT64:30"], + parameters=["conversion_window:INT64:30", "submission_date:DATE:{{ds}}"], ) mozilla_vpn_derived__site_metrics_summary__v2 = bigquery_etl_query( diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/microsoft_derived/app_acquisitions_v1/query.py /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/microsoft_derived/app_acquisitions_v1/query.py --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/microsoft_derived/app_acquisitions_v1/query.py 2024-06-20 17:25:56.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/microsoft_derived/app_acquisitions_v1/query.py 2024-06-20 17:26:04.000000000 +0000 @@ -210,7 +210,7 @@ project = args.project dataset = args.dataset - table_name = "app_acquisitions_v1" + table_name = "app_acquisitions" date = args.date client_id = MS_CLIENT_ID diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/microsoft_derived/app_conversions_v1/query.py /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/microsoft_derived/app_conversions_v1/query.py --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/microsoft_derived/app_conversions_v1/query.py 2024-06-20 17:25:56.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/microsoft_derived/app_conversions_v1/query.py 2024-06-20 17:26:04.000000000 +0000 @@ -201,7 +201,7 @@ project = args.project dataset = args.dataset - table_name = "app_conversions_v1" + table_name = "app_conversions" date = args.date client_id = MS_CLIENT_ID diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/microsoft_derived/app_installs_v1/query.py /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/microsoft_derived/app_installs_v1/query.py --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/microsoft_derived/app_installs_v1/query.py 2024-06-20 17:25:56.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/microsoft_derived/app_installs_v1/query.py 2024-06-20 17:26:04.000000000 +0000 @@ -187,7 +187,7 @@ project = args.project dataset = args.dataset - table_name = "app_installs_v1" + table_name = "app_installs" date = args.date client_id = MS_CLIENT_ID diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/mozilla_org_derived/gclid_conversions_v2/metadata.yaml /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/mozilla_org_derived/gclid_conversions_v2/metadata.yaml --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/mozilla_org_derived/gclid_conversions_v2/metadata.yaml 2024-06-20 17:25:56.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/mozilla_org_derived/gclid_conversions_v2/metadata.yaml 2024-06-20 17:26:04.000000000 +0000 @@ -13,12 +13,13 @@ owner1: mhirose@mozilla.com scheduling: dag_name: bqetl_google_analytics_derived_ga4 - date_partition_parameter: activity_date - parameters: ["conversion_window:INT64:30"] + date_partition_parameter: submission_date + parameters: ["conversion_window:INT64:30", "submission_date:DATE:{{ds}}"] + date_partition_offset: 0 bigquery: time_partitioning: type: day - field: 'activity_date' + field: activity_date require_partition_filter: false expiration_days: null references: {} diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/mozilla_org_derived/gclid_conversions_v2/query.sql /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/mozilla_org_derived/gclid_conversions_v2/query.sql --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/mozilla_org_derived/gclid_conversions_v2/query.sql 2024-06-20 17:25:56.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/mozilla_org_derived/gclid_conversions_v2/query.sql 2024-06-20 17:26:04.000000000 +0000 @@ -10,9 +10,9 @@ CROSS JOIN UNNEST(all_reported_stub_session_ids) AS stub_session_id WHERE - session_date >= DATE_SUB(@activity_date, INTERVAL @conversion_window DAY) + session_date >= DATE_SUB(@submission_date, INTERVAL @conversion_window DAY) -- Next line is needed for backfilling purposes - AND session_date <= @activity_date + AND session_date <= @submission_date AND gclid IS NOT NULL ), --Step 2: Get all the download tokens associated with a known GA client ID & stub session ID @@ -50,7 +50,7 @@ FROM `moz-fx-data-shared-prod.telemetry_derived.clients_daily_v6` WHERE - submission_date = @activity_date + submission_date = @submission_date UNION ALL SELECT client_id AS telemetry_client_id, @@ -65,8 +65,8 @@ `moz-fx-data-shared-prod.google_ads_derived.conversion_event_categorization_v1` WHERE (event_1 IS TRUE OR event_2 IS TRUE OR event_3 IS TRUE) - AND report_date = @activity_date - AND first_seen_date < @activity_date --needed since this is a required partition filter + AND report_date = @submission_date + AND first_seen_date < @submission_date --needed since this is a required partition filter ), telemetry_id_to_activity AS ( SELECT ```

Link to full diff

dataops-ci-bot commented 1 week ago

Integration report for "Merge branch 'main' into fix-gclid-conv"

sql.diff

Click to expand! ```diff diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/dags/bqetl_google_analytics_derived_ga4.py /tmp/workspace/generated-sql/dags/bqetl_google_analytics_derived_ga4.py --- /tmp/workspace/main-generated-sql/dags/bqetl_google_analytics_derived_ga4.py 2024-06-20 17:39:20.000000000 +0000 +++ /tmp/workspace/generated-sql/dags/bqetl_google_analytics_derived_ga4.py 2024-06-20 17:40:24.000000000 +0000 @@ -162,7 +162,8 @@ "telemetry-alerts@mozilla.com", ], depends_on_past=False, - parameters=["conversion_window:INT64:30"] + ["activity_date:DATE:{{ds}}"], + parameters=["conversion_window:INT64:30", "submission_date:DATE:{{ds}}"] + + ["submission_date:DATE:{{ds}}"], retries=0, ) @@ -372,7 +373,7 @@ mozilla_org_derived__gclid_conversions__v2 = bigquery_etl_query( task_id="mozilla_org_derived__gclid_conversions__v2", - destination_table='gclid_conversions_v2${{ macros.ds_format(macros.ds_add(ds, -14), "%Y-%m-%d", "%Y%m%d") }}', + destination_table="gclid_conversions_v2", dataset_id="mozilla_org_derived", project_id="moz-fx-data-shared-prod", owner="mhirose@mozilla.com", @@ -381,10 +382,9 @@ "mhirose@mozilla.com", "telemetry-alerts@mozilla.com", ], - date_partition_parameter=None, + date_partition_parameter="submission_date", depends_on_past=False, - parameters=["activity_date:DATE:{{macros.ds_add(ds, -14)}}"] - + ["conversion_window:INT64:30"], + parameters=["conversion_window:INT64:30", "submission_date:DATE:{{ds}}"], ) mozilla_vpn_derived__site_metrics_summary__v2 = bigquery_etl_query( diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/mozilla_org_derived/gclid_conversions_v2/metadata.yaml /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/mozilla_org_derived/gclid_conversions_v2/metadata.yaml --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/mozilla_org_derived/gclid_conversions_v2/metadata.yaml 2024-06-20 17:35:34.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/mozilla_org_derived/gclid_conversions_v2/metadata.yaml 2024-06-20 17:35:31.000000000 +0000 @@ -13,12 +13,13 @@ owner1: mhirose@mozilla.com scheduling: dag_name: bqetl_google_analytics_derived_ga4 - date_partition_parameter: activity_date - parameters: ["conversion_window:INT64:30"] + date_partition_parameter: submission_date + parameters: ["conversion_window:INT64:30", "submission_date:DATE:{{ds}}"] + date_partition_offset: 0 bigquery: time_partitioning: type: day - field: 'activity_date' + field: activity_date require_partition_filter: false expiration_days: null references: {} diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/mozilla_org_derived/gclid_conversions_v2/query.sql /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/mozilla_org_derived/gclid_conversions_v2/query.sql --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/mozilla_org_derived/gclid_conversions_v2/query.sql 2024-06-20 17:35:34.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/mozilla_org_derived/gclid_conversions_v2/query.sql 2024-06-20 17:35:31.000000000 +0000 @@ -10,9 +10,9 @@ CROSS JOIN UNNEST(all_reported_stub_session_ids) AS stub_session_id WHERE - session_date >= DATE_SUB(@activity_date, INTERVAL @conversion_window DAY) + session_date >= DATE_SUB(@submission_date, INTERVAL @conversion_window DAY) -- Next line is needed for backfilling purposes - AND session_date <= @activity_date + AND session_date <= @submission_date AND gclid IS NOT NULL ), --Step 2: Get all the download tokens associated with a known GA client ID & stub session ID @@ -50,7 +50,7 @@ FROM `moz-fx-data-shared-prod.telemetry_derived.clients_daily_v6` WHERE - submission_date = @activity_date + submission_date = @submission_date UNION ALL SELECT client_id AS telemetry_client_id, @@ -65,8 +65,8 @@ `moz-fx-data-shared-prod.google_ads_derived.conversion_event_categorization_v1` WHERE (event_1 IS TRUE OR event_2 IS TRUE OR event_3 IS TRUE) - AND report_date = @activity_date - AND first_seen_date < @activity_date --needed since this is a required partition filter + AND report_date = @submission_date + AND first_seen_date < @submission_date --needed since this is a required partition filter ), telemetry_id_to_activity AS ( SELECT ```

Link to full diff

dataops-ci-bot commented 1 week ago

Integration report for "Update metadata.yaml"

sql.diff

Click to expand! ```diff diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/dags/bqetl_google_analytics_derived_ga4.py /tmp/workspace/generated-sql/dags/bqetl_google_analytics_derived_ga4.py --- /tmp/workspace/main-generated-sql/dags/bqetl_google_analytics_derived_ga4.py 2024-06-20 17:38:55.000000000 +0000 +++ /tmp/workspace/generated-sql/dags/bqetl_google_analytics_derived_ga4.py 2024-06-20 17:40:19.000000000 +0000 @@ -162,7 +162,7 @@ "telemetry-alerts@mozilla.com", ], depends_on_past=False, - parameters=["conversion_window:INT64:30"] + ["activity_date:DATE:{{ds}}"], + parameters=["conversion_window:INT64:30"] + ["submission_date:DATE:{{ds}}"], retries=0, ) @@ -372,7 +372,7 @@ mozilla_org_derived__gclid_conversions__v2 = bigquery_etl_query( task_id="mozilla_org_derived__gclid_conversions__v2", - destination_table='gclid_conversions_v2${{ macros.ds_format(macros.ds_add(ds, -14), "%Y-%m-%d", "%Y%m%d") }}', + destination_table="gclid_conversions_v2", dataset_id="mozilla_org_derived", project_id="moz-fx-data-shared-prod", owner="mhirose@mozilla.com", @@ -381,10 +381,9 @@ "mhirose@mozilla.com", "telemetry-alerts@mozilla.com", ], - date_partition_parameter=None, + date_partition_parameter="submission_date", depends_on_past=False, - parameters=["activity_date:DATE:{{macros.ds_add(ds, -14)}}"] - + ["conversion_window:INT64:30"], + parameters=["conversion_window:INT64:30"], ) mozilla_vpn_derived__site_metrics_summary__v2 = bigquery_etl_query( diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/mozilla_org_derived/gclid_conversions_v2/metadata.yaml /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/mozilla_org_derived/gclid_conversions_v2/metadata.yaml --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/mozilla_org_derived/gclid_conversions_v2/metadata.yaml 2024-06-20 17:35:35.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/mozilla_org_derived/gclid_conversions_v2/metadata.yaml 2024-06-20 17:35:39.000000000 +0000 @@ -13,12 +13,13 @@ owner1: mhirose@mozilla.com scheduling: dag_name: bqetl_google_analytics_derived_ga4 - date_partition_parameter: activity_date + date_partition_parameter: submission_date parameters: ["conversion_window:INT64:30"] + date_partition_offset: 0 bigquery: time_partitioning: type: day - field: 'activity_date' + field: activity_date require_partition_filter: false expiration_days: null references: {} diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/mozilla_org_derived/gclid_conversions_v2/query.sql /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/mozilla_org_derived/gclid_conversions_v2/query.sql --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/mozilla_org_derived/gclid_conversions_v2/query.sql 2024-06-20 17:35:35.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/mozilla_org_derived/gclid_conversions_v2/query.sql 2024-06-20 17:35:39.000000000 +0000 @@ -10,9 +10,9 @@ CROSS JOIN UNNEST(all_reported_stub_session_ids) AS stub_session_id WHERE - session_date >= DATE_SUB(@activity_date, INTERVAL @conversion_window DAY) + session_date >= DATE_SUB(@submission_date, INTERVAL @conversion_window DAY) -- Next line is needed for backfilling purposes - AND session_date <= @activity_date + AND session_date <= @submission_date AND gclid IS NOT NULL ), --Step 2: Get all the download tokens associated with a known GA client ID & stub session ID @@ -50,7 +50,7 @@ FROM `moz-fx-data-shared-prod.telemetry_derived.clients_daily_v6` WHERE - submission_date = @activity_date + submission_date = @submission_date UNION ALL SELECT client_id AS telemetry_client_id, @@ -65,8 +65,8 @@ `moz-fx-data-shared-prod.google_ads_derived.conversion_event_categorization_v1` WHERE (event_1 IS TRUE OR event_2 IS TRUE OR event_3 IS TRUE) - AND report_date = @activity_date - AND first_seen_date < @activity_date --needed since this is a required partition filter + AND report_date = @submission_date + AND first_seen_date < @submission_date --needed since this is a required partition filter ), telemetry_id_to_activity AS ( SELECT ```

Link to full diff