Hi there, I am not sure if my dataform version is wrong but it will raise the following issue when running both non-incremental/incremental query. the checkpoint variables in pre_operations is not defined by somehow
Drop the code here for reference:
config {
type: "incremental",
schema: constants.STAGING_DATASET,
description: "Staging model for GA4 events_* table. Basic column casting and value extractions are performed here",
bigquery: {
partitionBy: "event_date",
clusterBy: ["user_pseudo_id", "ga_session_id"]
}
}
pre_operations {
declare event_date_checkpoint default (
${when(incremental(),
`select max(event_date) -3 from ${self()}`,
`select cast('${constants.START_DATE}' as date format 'yyyymmdd')`)
}
);
declare is_active_user bool default null; --to avoid errors on old tables where this field didn't exist
declare collected_traffic_source struct<manual_campaign_id array<string>,
manual_campaign_name array<string>,
manual_source array<string>,
manual_medium array<string>,
manual_term array<string>,
manual_content array<string>,
gclid array<string>,
dclid array<string>,
srsltid array<string>> default null; --to avoid errors on old tables where this field didn't exist
---
${when(incremental(),
`delete from ${self()} where event_date >= event_date_checkpoint`)
}
}
with
source as (
select
event_timestamp, --utc
datetime(timestamp_seconds(cast(event_timestamp / 1000000 as int64)), '${constants.REPORTING_TIME_ZONE}') as event_datetime, --ga4 property reporting time zone
cast(event_date as date format 'yyyymmdd') as event_date, --event_date is already reported in ga4 property reporting time zone
lower(replace(trim(event_name), " ", "_")) as event_name,
event_params,
event_previous_timestamp,
event_value_in_usd,
event_bundle_sequence_id,
event_server_timestamp_offset,
user_id,
user_pseudo_id,
user_properties,
user_first_touch_timestamp,
device,
geo,
app_info,
is_active_user,
(
select
as struct traffic_source.source as source,
traffic_source.medium as medium,
traffic_source.name as campaign
) as traffic_source,
(
select
as struct collected_traffic_source.manual_campaign_id as manual_campaign_id,
lower(collected_traffic_source.manual_campaign_name) as manual_campaign_name,
lower(collected_traffic_source.manual_source) as manual_source,
lower(collected_traffic_source.manual_medium) as manual_medium,
lower(collected_traffic_source.manual_content) as manual_content,
lower(collected_traffic_source.manual_term) as manual_term,
collected_traffic_source.gclid as gclid,
collected_traffic_source.dclid as dclid
) as collected_traffic_source,
stream_id,
platform,
ecommerce,
items,
${helpers.unnestColumn('event_params', 'ga_session_id', 'int_value')} as ga_session_id,
${helpers.unnestColumn('event_params', 'page_location')} as page_location,
${helpers.unnestColumn('event_params', 'ga_session_number', 'int_value')} as ga_session_number,
${helpers.unnestColumn('event_params', 'engagement_time_msec', 'int_value')} as engagement_time_msec,
${helpers.unnestColumn('event_params', 'page_title')} as page_title,
${helpers.unnestColumn('event_params', 'page_referrer')} as page_referrer,
(
select
as struct lower(${helpers.unnestColumn('event_params', 'source')}) as source,
lower(${helpers.unnestColumn('event_params', 'medium')}) as medium,
lower(${helpers.unnestColumn('event_params', 'campaign')}) as campaign,
lower(${helpers.unnestColumn('event_params', 'content')}) as content,
lower(${helpers.unnestColumn('event_params', 'term')}) as term,
${helpers.unnestColumn('event_params', 'gclid')} as gclid,
${helpers.unnestColumn('event_params', 'dclid')} as dclid
) as event_traffic_source,
coalesce( (${helpers.unnestColumn('event_params', 'session_engaged', 'int_value')}),
(case
when (${helpers.unnestColumn('event_params', 'session_engaged')}) = "1" then 1
end
) ) as session_engaged
from
${ref('events_*')}
where
_table_suffix not like "%intraday%"
and _table_suffix not like "%fresh%"
and cast(_table_suffix as date format 'yyyymmdd') >= event_date_checkpoint
)
select
*
from
source
Hi there, I am not sure if my dataform version is wrong but it will raise the following issue when running both non-incremental/incremental query. the checkpoint variables in pre_operations is not defined by somehow
Drop the code here for reference:
Many thanks