mozilla / bigquery-etl

Bigquery ETL
https://mozilla.github.io/bigquery-etl
Mozilla Public License 2.0
246 stars 98 forks source link

removing upstream dependencies for now #5688

Closed chelseybeck closed 1 month ago

chelseybeck commented 1 month ago

The braze dag schedule now runs 3 times a day and upstream dependencies do not. We can get by with the upstream tables having a daily cadence as the rest of the data comes from an upstream source off of bqetl

Checklist for reviewer:

For modifications to schemas in restricted namespaces (see CODEOWNERS):

┆Issue is synchronized with this Jira Task

dataops-ci-bot commented 1 month ago

Integration report for "Merge branch 'main' into DENG-3763-fix-dependencies"

sql.diff

Click to expand! ```diff diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/dags/bqetl_braze.py /tmp/workspace/generated-sql/dags/bqetl_braze.py --- /tmp/workspace/main-generated-sql/dags/bqetl_braze.py 2024-05-30 18:51:00.000000000 +0000 +++ /tmp/workspace/generated-sql/dags/bqetl_braze.py 2024-05-30 18:52:23.000000000 +0000 @@ -55,28 +55,6 @@ tags=tags, ) as dag: - wait_for_subscription_platform_derived__logical_subscriptions_history__v1 = ExternalTaskSensor( - task_id="wait_for_subscription_platform_derived__logical_subscriptions_history__v1", - external_dag_id="bqetl_subplat", - external_task_id="subscription_platform_derived__logical_subscriptions_history__v1", - check_existence=True, - mode="reschedule", - allowed_states=ALLOWED_STATES, - failed_states=FAILED_STATES, - pool="DATA_ENG_EXTERNALTASKSENSOR", - ) - - wait_for_checks__fail_marketing_suppression_list_derived__main_suppression_list__v1 = ExternalTaskSensor( - task_id="wait_for_checks__fail_marketing_suppression_list_derived__main_suppression_list__v1", - external_dag_id="bqetl_marketing_suppression_list", - external_task_id="checks__fail_marketing_suppression_list_derived__main_suppression_list__v1", - check_existence=True, - mode="reschedule", - allowed_states=ALLOWED_STATES, - failed_states=FAILED_STATES, - pool="DATA_ENG_EXTERNALTASKSENSOR", - ) - braze_derived__newsletters__v1 = bigquery_etl_query( task_id="braze_derived__newsletters__v1", destination_table="newsletters_v1", @@ -491,10 +469,6 @@ braze_derived__newsletters__v1.set_upstream(checks__fail_braze_derived__users__v1) - braze_derived__products__v1.set_upstream( - wait_for_subscription_platform_derived__logical_subscriptions_history__v1 - ) - braze_derived__subscriptions__v1.set_upstream( checks__fail_braze_derived__subscriptions_map__v1 ) @@ -517,10 +491,6 @@ checks__fail_braze_derived__waitlists__v1 ) - braze_derived__users__v1.set_upstream( - wait_for_checks__fail_marketing_suppression_list_derived__main_suppression_list__v1 - ) - braze_derived__waitlists__v1.set_upstream(checks__fail_braze_derived__users__v1) braze_external__changed_firefox_subscriptions_sync__v1.set_upstream( diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/dags/bqetl_marketing_suppression_list.py /tmp/workspace/generated-sql/dags/bqetl_marketing_suppression_list.py --- /tmp/workspace/main-generated-sql/dags/bqetl_marketing_suppression_list.py 2024-05-30 18:51:00.000000000 +0000 +++ /tmp/workspace/generated-sql/dags/bqetl_marketing_suppression_list.py 2024-05-30 18:52:23.000000000 +0000 @@ -118,19 +118,6 @@ retries=0, ) - with TaskGroup( - "checks__fail_marketing_suppression_list_derived__main_suppression_list__v1_external", - ) as checks__fail_marketing_suppression_list_derived__main_suppression_list__v1_external: - ExternalTaskMarker( - task_id="bqetl_braze__wait_for_checks__fail_marketing_suppression_list_derived__main_suppression_list__v1", - external_dag_id="bqetl_braze", - external_task_id="wait_for_checks__fail_marketing_suppression_list_derived__main_suppression_list__v1", - ) - - checks__fail_marketing_suppression_list_derived__main_suppression_list__v1_external.set_upstream( - checks__fail_marketing_suppression_list_derived__main_suppression_list__v1 - ) - checks__warn_marketing_suppression_list_derived__main_suppression_list__v1 = bigquery_dq_check( task_id="checks__warn_marketing_suppression_list_derived__main_suppression_list__v1", source_table="main_suppression_list_v1", diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/dags/bqetl_subplat.py /tmp/workspace/generated-sql/dags/bqetl_subplat.py --- /tmp/workspace/main-generated-sql/dags/bqetl_subplat.py 2024-05-30 18:51:00.000000000 +0000 +++ /tmp/workspace/generated-sql/dags/bqetl_subplat.py 2024-05-30 18:52:20.000000000 +0000 @@ -1036,19 +1036,6 @@ ) ) - with TaskGroup( - "subscription_platform_derived__logical_subscriptions_history__v1_external", - ) as subscription_platform_derived__logical_subscriptions_history__v1_external: - ExternalTaskMarker( - task_id="bqetl_braze__wait_for_subscription_platform_derived__logical_subscriptions_history__v1", - external_dag_id="bqetl_braze", - external_task_id="wait_for_subscription_platform_derived__logical_subscriptions_history__v1", - ) - - subscription_platform_derived__logical_subscriptions_history__v1_external.set_upstream( - subscription_platform_derived__logical_subscriptions_history__v1 - ) - subscription_platform_derived__monthly_active_logical_subscriptions__v1 = bigquery_etl_query( task_id="subscription_platform_derived__monthly_active_logical_subscriptions__v1", destination_table="monthly_active_logical_subscriptions_v1", diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/braze_derived/products_v1/metadata.yaml /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/braze_derived/products_v1/metadata.yaml --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/braze_derived/products_v1/metadata.yaml 2024-05-30 18:49:04.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/braze_derived/products_v1/metadata.yaml 2024-05-30 18:49:29.000000000 +0000 @@ -12,10 +12,7 @@ scheduling: dag_name: bqetl_braze date_partition_parameter: null - referenced_tables: - - - moz-fx-data-shared-prod - - subscription_platform_derived - - logical_subscriptions_history_v1 + referenced_tables: [] bigquery: time_partitioning: null range_partitioning: null diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/braze_derived/users_v1/metadata.yaml /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/braze_derived/users_v1/metadata.yaml --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/braze_derived/users_v1/metadata.yaml 2024-05-30 18:49:04.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/braze_derived/users_v1/metadata.yaml 2024-05-30 18:49:29.000000000 +0000 @@ -13,10 +13,7 @@ scheduling: dag_name: bqetl_braze date_partition_parameter: null - referenced_tables: - - - moz-fx-data-shared-prod - - marketing_suppression_list_derived - - main_suppression_list_v1 + referenced_tables: [] bigquery: time_partitioning: null range_partitioning: null ```

Link to full diff

dataops-ci-bot commented 1 month ago

Integration report for "Merge branch 'main' into DENG-3763-fix-dependencies"

sql.diff

Click to expand! ```diff diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/dags/bqetl_braze.py /tmp/workspace/generated-sql/dags/bqetl_braze.py --- /tmp/workspace/main-generated-sql/dags/bqetl_braze.py 2024-05-30 23:21:37.000000000 +0000 +++ /tmp/workspace/generated-sql/dags/bqetl_braze.py 2024-05-30 23:22:44.000000000 +0000 @@ -55,28 +55,6 @@ tags=tags, ) as dag: - wait_for_subscription_platform_derived__logical_subscriptions_history__v1 = ExternalTaskSensor( - task_id="wait_for_subscription_platform_derived__logical_subscriptions_history__v1", - external_dag_id="bqetl_subplat", - external_task_id="subscription_platform_derived__logical_subscriptions_history__v1", - check_existence=True, - mode="reschedule", - allowed_states=ALLOWED_STATES, - failed_states=FAILED_STATES, - pool="DATA_ENG_EXTERNALTASKSENSOR", - ) - - wait_for_checks__fail_marketing_suppression_list_derived__main_suppression_list__v1 = ExternalTaskSensor( - task_id="wait_for_checks__fail_marketing_suppression_list_derived__main_suppression_list__v1", - external_dag_id="bqetl_marketing_suppression_list", - external_task_id="checks__fail_marketing_suppression_list_derived__main_suppression_list__v1", - check_existence=True, - mode="reschedule", - allowed_states=ALLOWED_STATES, - failed_states=FAILED_STATES, - pool="DATA_ENG_EXTERNALTASKSENSOR", - ) - braze_derived__newsletters__v1 = bigquery_etl_query( task_id="braze_derived__newsletters__v1", destination_table="newsletters_v1", @@ -491,10 +469,6 @@ braze_derived__newsletters__v1.set_upstream(checks__fail_braze_derived__users__v1) - braze_derived__products__v1.set_upstream( - wait_for_subscription_platform_derived__logical_subscriptions_history__v1 - ) - braze_derived__subscriptions__v1.set_upstream( checks__fail_braze_derived__subscriptions_map__v1 ) @@ -517,10 +491,6 @@ checks__fail_braze_derived__waitlists__v1 ) - braze_derived__users__v1.set_upstream( - wait_for_checks__fail_marketing_suppression_list_derived__main_suppression_list__v1 - ) - braze_derived__waitlists__v1.set_upstream(checks__fail_braze_derived__users__v1) braze_external__changed_firefox_subscriptions_sync__v1.set_upstream( diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/dags/bqetl_marketing_suppression_list.py /tmp/workspace/generated-sql/dags/bqetl_marketing_suppression_list.py --- /tmp/workspace/main-generated-sql/dags/bqetl_marketing_suppression_list.py 2024-05-30 23:21:37.000000000 +0000 +++ /tmp/workspace/generated-sql/dags/bqetl_marketing_suppression_list.py 2024-05-30 23:22:45.000000000 +0000 @@ -118,19 +118,6 @@ retries=0, ) - with TaskGroup( - "checks__fail_marketing_suppression_list_derived__main_suppression_list__v1_external", - ) as checks__fail_marketing_suppression_list_derived__main_suppression_list__v1_external: - ExternalTaskMarker( - task_id="bqetl_braze__wait_for_checks__fail_marketing_suppression_list_derived__main_suppression_list__v1", - external_dag_id="bqetl_braze", - external_task_id="wait_for_checks__fail_marketing_suppression_list_derived__main_suppression_list__v1", - ) - - checks__fail_marketing_suppression_list_derived__main_suppression_list__v1_external.set_upstream( - checks__fail_marketing_suppression_list_derived__main_suppression_list__v1 - ) - checks__warn_marketing_suppression_list_derived__main_suppression_list__v1 = bigquery_dq_check( task_id="checks__warn_marketing_suppression_list_derived__main_suppression_list__v1", source_table="main_suppression_list_v1", diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/dags/bqetl_subplat.py /tmp/workspace/generated-sql/dags/bqetl_subplat.py --- /tmp/workspace/main-generated-sql/dags/bqetl_subplat.py 2024-05-30 23:21:37.000000000 +0000 +++ /tmp/workspace/generated-sql/dags/bqetl_subplat.py 2024-05-30 23:22:41.000000000 +0000 @@ -1036,19 +1036,6 @@ ) ) - with TaskGroup( - "subscription_platform_derived__logical_subscriptions_history__v1_external", - ) as subscription_platform_derived__logical_subscriptions_history__v1_external: - ExternalTaskMarker( - task_id="bqetl_braze__wait_for_subscription_platform_derived__logical_subscriptions_history__v1", - external_dag_id="bqetl_braze", - external_task_id="wait_for_subscription_platform_derived__logical_subscriptions_history__v1", - ) - - subscription_platform_derived__logical_subscriptions_history__v1_external.set_upstream( - subscription_platform_derived__logical_subscriptions_history__v1 - ) - subscription_platform_derived__monthly_active_logical_subscriptions__v1 = bigquery_etl_query( task_id="subscription_platform_derived__monthly_active_logical_subscriptions__v1", destination_table="monthly_active_logical_subscriptions_v1", diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/braze_derived/products_v1/metadata.yaml /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/braze_derived/products_v1/metadata.yaml --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/braze_derived/products_v1/metadata.yaml 2024-05-30 23:19:45.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/braze_derived/products_v1/metadata.yaml 2024-05-30 23:19:46.000000000 +0000 @@ -12,10 +12,7 @@ scheduling: dag_name: bqetl_braze date_partition_parameter: null - referenced_tables: - - - moz-fx-data-shared-prod - - subscription_platform_derived - - logical_subscriptions_history_v1 + referenced_tables: [] bigquery: time_partitioning: null range_partitioning: null diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/braze_derived/users_v1/metadata.yaml /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/braze_derived/users_v1/metadata.yaml --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/braze_derived/users_v1/metadata.yaml 2024-05-30 23:19:45.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/braze_derived/users_v1/metadata.yaml 2024-05-30 23:19:46.000000000 +0000 @@ -13,10 +13,7 @@ scheduling: dag_name: bqetl_braze date_partition_parameter: null - referenced_tables: - - - moz-fx-data-shared-prod - - marketing_suppression_list_derived - - main_suppression_list_v1 + referenced_tables: [] bigquery: time_partitioning: null range_partitioning: null ```

Link to full diff