google-marketing-solutions / ga4_dataform

Apache License 2.0
113 stars 38 forks source link

Unrecognized name: event_date_checkpoint #8

Open LRENZ opened 2 months ago

LRENZ commented 2 months ago

Hi there, I am not sure if my dataform version is wrong but it will raise the following issue when running both non-incremental/incremental query. the checkpoint variables in pre_operations is not defined by somehow

Drop the code here for reference:

config {
  type: "incremental",
  schema: constants.STAGING_DATASET,
  description: "Staging model for GA4 events_* table. Basic column casting and value extractions are performed here",
  bigquery: {
    partitionBy: "event_date",
    clusterBy: ["user_pseudo_id", "ga_session_id"]
  }
}

pre_operations {
  declare event_date_checkpoint default (
    ${when(incremental(),
      `select max(event_date) -3 from ${self()}`,
      `select cast('${constants.START_DATE}' as date format 'yyyymmdd')`)
    }
  );
  declare is_active_user bool default null; --to avoid errors on old tables where this field didn't exist

  declare collected_traffic_source struct<manual_campaign_id array<string>,
                                        manual_campaign_name array<string>,
                                        manual_source array<string>,
                                        manual_medium array<string>,
                                        manual_term array<string>,
                                        manual_content array<string>,
                                        gclid array<string>,
                                        dclid array<string>,
                                        srsltid array<string>> default null; --to avoid errors on old tables where this field didn't exist
  ---
  ${when(incremental(),
    `delete from ${self()} where event_date >= event_date_checkpoint`)
  }
}

with
  source as (
    select
      event_timestamp, --utc
      datetime(timestamp_seconds(cast(event_timestamp / 1000000 as int64)), '${constants.REPORTING_TIME_ZONE}') as event_datetime, --ga4 property reporting time zone
      cast(event_date as date format 'yyyymmdd') as event_date, --event_date is already reported in ga4 property reporting time zone
      lower(replace(trim(event_name), " ", "_")) as event_name,
      event_params,
      event_previous_timestamp,
      event_value_in_usd,
      event_bundle_sequence_id,
      event_server_timestamp_offset,
      user_id,
      user_pseudo_id,
      user_properties,
      user_first_touch_timestamp,
      device,
      geo,
      app_info,
      is_active_user,
      (
      select
        as struct traffic_source.source as source,
        traffic_source.medium as medium,
        traffic_source.name as campaign
      ) as traffic_source,
      (
      select
        as struct collected_traffic_source.manual_campaign_id as manual_campaign_id,
        lower(collected_traffic_source.manual_campaign_name) as manual_campaign_name,
        lower(collected_traffic_source.manual_source) as manual_source,
        lower(collected_traffic_source.manual_medium) as manual_medium,
        lower(collected_traffic_source.manual_content) as manual_content,
        lower(collected_traffic_source.manual_term) as manual_term,
        collected_traffic_source.gclid as gclid,
        collected_traffic_source.dclid as dclid
      ) as collected_traffic_source,
      stream_id,
      platform,
      ecommerce,
      items,
      ${helpers.unnestColumn('event_params', 'ga_session_id', 'int_value')} as ga_session_id,
      ${helpers.unnestColumn('event_params', 'page_location')} as page_location,
      ${helpers.unnestColumn('event_params', 'ga_session_number', 'int_value')} as ga_session_number,
      ${helpers.unnestColumn('event_params', 'engagement_time_msec', 'int_value')} as engagement_time_msec,
      ${helpers.unnestColumn('event_params', 'page_title')} as page_title,
      ${helpers.unnestColumn('event_params', 'page_referrer')} as page_referrer,
      (
      select
        as struct lower(${helpers.unnestColumn('event_params', 'source')}) as source,
        lower(${helpers.unnestColumn('event_params', 'medium')}) as medium,
        lower(${helpers.unnestColumn('event_params', 'campaign')}) as campaign,
        lower(${helpers.unnestColumn('event_params', 'content')}) as content,
        lower(${helpers.unnestColumn('event_params', 'term')}) as term,
        ${helpers.unnestColumn('event_params', 'gclid')} as gclid,
        ${helpers.unnestColumn('event_params', 'dclid')} as dclid
      ) as event_traffic_source,
      coalesce( (${helpers.unnestColumn('event_params', 'session_engaged', 'int_value')}),
                (case
                  when (${helpers.unnestColumn('event_params', 'session_engaged')}) = "1" then 1
                end
          ) ) as session_engaged
    from
      ${ref('events_*')}
    where
      _table_suffix not like "%intraday%"
      and _table_suffix not like "%fresh%"
      and cast(_table_suffix as date format 'yyyymmdd') >= event_date_checkpoint
  )
select
  *
from
  source

Many thanks

piariachi commented 2 months ago

Hi, could you try running the complied query in the BigQuery UI and see if the variable is read there?