apache / seatunnel

SeaTunnel is a next-generation super high-performance, distributed, massive data integration tool.
https://seatunnel.apache.org/
Apache License 2.0
8.03k stars 1.82k forks source link

waterdrop-hive2ck #589

Closed AndyNiu closed 3 years ago

AndyNiu commented 3 years ago

Waterdrop Version(Waterdrop版本) 1.4.3-2.11.8 A clear version of Waterdrop.

Flink or Spark Version(Flink 或者 Spark 版本) 2.3 A clear version of Flink or Spark.

Java or Scala Version(Java或者Scala版本) 1.8 A clear version of Java or Scala.

Waterdrop Config(Waterdrop配置文件)

######
###### This config file is a demonstration of batch processing in waterdrop config
######

spark {
  spark.sql.catalogImplementation = "hive"
  spark.app.name = "adm_ck_application_arrow_sku"
  spark.executor.instances = 50
  spark.dynamicAllocation.maxExecutors = 20
  spark.dynamicAllocation.minExecutors = 5
  spark.buffer.pageSize = 1m
}

input {
  hive {
    pre_sql =  """
 select 
    mkt_pred_gender as pred_gender,
    mkt_real_gender as real_gender,
    mkt_pred_age as pred_age,
    mkt_real_age as real_age,
    ulp_is_jd_emplyoee,
    ulp_base_profession,
    finan_birth_day,
    finan_pin001001006,
    last_open_province_name,
    last_open_city_name,
    last_order_province_name,
    last_order_city_name,
    last_order_county_name,
    null as areaV2,
    mkt_city_level as city_level,
    ulp_base_marriage,
    search_if_university,
    stu_is_certif_student as is_certif_student,
    stu_education as education,
    stu_school_id as school_id,
    finan_pin004006002003,
    mkt_user_mobile as user_mobile,
    ulp_seni_hascar,
    ad_family_owners,
    ad_pet_owner,
    mkt_user_life_cycle_type,
    treasure_jxz,
    device_user_vitality as user_vitality,
    plus_user_flag,
    plus_enddate,
    plus_zs_enddate,
    yf_bean,
    df_bean,
    is_freight,
    mkt_medal_applicant as medal_applicant,
    mkt_digital_medal as digital_medal,
    mkt_medal_books as medal_books,
    mkt_medal_food as medal_food,
    mkt_clean_medal as clean_medal,
    mkt_medal_infantmom as medal_infantmom,
    mkt_medal_glob as medal_glob,
    mkt_medal_pet as medal_pet,
    mkt_medal_bine as medal_bine,
    mkt_medal_communicate as medal_communicate,
    mkt_medal_perscare as medal_perscare,
    mkt_medal_clothing as medal_clothing,
    mkt_medal_home as medal_home,
    mkt_medal_outdoor as medal_outdoor,
    mkt_medal_beautypro as medal_beautypro,
    mkt_medal_luxury as medal_luxury,
    mkt_medal_clock as medal_clock,
    mkt_medal_medicine as medal_medicine,
    mkt_medal_car as medal_car,
    mkt_medal_flowgar as medal_flowgar,
    mkt_medal_fresh as medal_fresh,
    mkt_is_risk_user as gooduser_score,
    last1y_buy_valid_parent_ord_qtty,
    pg_last180d_tuan_cnt,
    pg_last90d_tuan_cnt,
    pg_last1d_tuan_cnt,
    pg_last30d_tuan_cnt,
    pg_last180d_join_cnt,
    pg_last90d_join_cnt,
    pg_last30d_join_cnt,
    pg_last1d_join_cnt,
    pg_last180d_tuan_suc_cnt,
    pg_last90d_tuan_suc_cnt,
    pg_last30d_tuan_suc_cnt,
    pg_last1d_tuan_suc_cnt,
    mkt_last_app_login_tm as last_app_login_tm,
    null as ord_hour_prefr,
    ulp_sale_client,
    null as browser_hour_prefr,
    cpp_view_terminal,
    null as email_type,
    null as time_range,
    null as send_times,
    null as open_times,
    null as click_times,
    null as open_type,
    null as is_mail_buy,
    null as last_send_tm,
    null as last_open_tm,
    null as last_click_tm,
    ulp_sale_paytype,
    sens_promotion,
    ad_is_impulse_purchase_user as ad_is_impulse_purchase_user,
    ad_coupon_use as ad_coupon_use,
    mkt_cook_prefer as cook_prefer,
    mkt_bake_prefer as bake_prefer,
    mkt_snake_prefer as snake_prefer,
    mkt_coffee_prefer as coffee_prefer,
    mkt_tea_prefer as tea_prefer,
    mkt_wine_prefer as wine_prefer,
    mkt_sport_prefer as sport_prefer,
    mkt_chess_and_card_prefer as chess_and_card_prefer,
    mkt_outdoor_sport_prefer as outdoor_sport_prefer,
    mkt_fitness_prefer as fitness_prefer,
    mkt_game_prefer as game_prefer,
    mkt_movie_prefer as movie_prefer,
    mkt_dance_prefer as dance_prefer,
    mkt_music_prefer as music_prefer,
    mkt_litterateur_prefer as litterateur_prefer,
    mkt_stamp_prefer as stamp_prefer,
    mkt_cartoon_prefer as cartoon_prefer,
    mkt_art_prefer as art_prefer,
    mkt_pet_prefer as pet_prefer,
    mkt_plants_prefer as plants_prefer,
    mkt_healthy_keep_prefer as healthy_keep_prefer,
    mkt_clean_prefer as clean_prefer,
    mkt_travel_prefer as travel_prefer,
    mkt_diy_prefer as diy_prefer,
    mkt_decoration_prefer as decoration_prefer,
    mkt_beauty_prefer as beauty_prefer,
    mkt_luxury_prefer as luxury_prefer,
    mkt_photo_prefer as photo_prefer,
    mkt_technology_prefer as technology_prefer,
    mkt_babychild_product_prefer as babychild_product_prefer,
    ad_focus_students,
    mkt_internet_knowledge_prefer as internet_knowledge_prefer,
    mkt_foreign_language_prefer as foreign_language_prefer,
    mkt_midnight_prefer as midnight_prefer,
    mkt_buddha_like_prefer as buddha_like_prefer,
    mkt_used_product_prefer as used_product_prefer,
    mkt_finance_prefer as finance_prefer,
    search_age_range as search_age_range,
    search_child_num as search_child_num,
    search_child_birth_month as search_child_birth_month,
    search_child_sex as search_child_sex,
    device_brand as device_brand,
    device_model as device_model,
    device_sub_times as sub_times,
    user_log_acct_original as user_log_acct,
    user_active,
    day as DAY,
    '2020-11-25' as dt,
    rank_fanbox,
    rank_yep,
    car_marry_flag as  marry_flag,  
    car_price_range as price_range,    
    car_old_car_user as old_car_user, 
    car_mileage as  mileage,     
    car_age_car as age_car,        
    car_distance  as distance,     
    car_brand_name as  brand_name,  
    car_series_name as  series_name,
    cate_count,
    null as art_cnt,
    business_cnt,
    tempr_is_cryogenic  as is_cryogenic,
    tempr_is_freeze as is_freeze,    
    tempr_is_refrigerate as is_refrigerate,
    tempr_is_temp_control as is_temp_control,
    tempr_is_live as  is_live,   
    tempr_is_fresh as  is_fresh,    
    tempr_is_supportdirect as is_supportdirect,
    tempr_is_nonesupport as is_nonesupport,
    brand_name_pre,
    mkt_user_value   as user_value,  
    mkt_city_tier_level as city_tier_level,
    loyalty_glob,
    cust_purchpower,
    ad_love_seckill_level as love_seckill_level,
    super_user_cate,
    is_stroll_user,
    ip_flag,
    mkt_small_capacity_prefer as  small_capacity_prefer,
    mkt_good_looking_prefer  as  good_looking_prefer,
    mkt_originality_prefer as  originality_prefer,
    mkt_color_prefer as color_prefer,
    mkt_custom_made_prefer as  custom_made_prefer,
    mkt_protable_prefer as  protable_prefer,
    mkt_quality_prefer  as  quality_prefer,
    pin004002004003,
    plus_open_channel,
    last1y_buy_valid_total_amount,
    pet_label,
    pet_type,
    pet_age,
    pet_bodysize,
    dolphin_score,
    dolphin_score_layer,
    phone_price_range,
    pin004002006033,
    pin004002006032,
    finan_fashion_score,
    finan_fashion_dept,
    finan_fashion_cat,
    null as sinkuser_score,
    mkt_business_prefer   as  business_prefer,
    pbc_hexin_flag,
    jxz_uptrend,
    jxz_downtrend,
    jxz_stable_section,
    usual_dim_city_id,
    new_prod_pref_3c,
    new_prod_pref_all,
    giftbox_fre_flag,
    springfestival_gift_target,
    springfestival_gift_top3_cate2,
    springfestival_gift_top3_style,
    springfestival_gift_price_per_unit,
    springfestival_gift_habit_score,
    springfestival_gift_year_latest,
    springfestival_gift_target_latest,
    springfestivall_gift_top3_cate2_latest,
    springfestival_gift_attention,
    springfestival_gift_top3_cate2_attention,
    children_grade,
    life_cycle_consumption_goods,
    local_score,
    value_glob_valuegrp,
    fresh_last_ord_days_seg,
    fresh_reord_cycle,
 `_c993` as dm_ord_precise_jdh,
`_c994` as dm_cart_precise_jdh,
`_c995` as dm_follow_precise_jdh,
`_c996` as dm_browse_precise_jdh,
`_c997` as dm_search_precise_jdh,
`_c998` as dm_comment_precise_jdh,
`_c999` as dm_online_inquiry_jdh,
`_c1000` as dm_ord_valuegrp_jdh,
tvshow_watched,
1y_jdmall_amount_level,
is_get_cash_user,
`_c1001` as perscare_purchase_power,
heal_yaojisong_store_distance,
`_c986` as dm_health_products_prefer_jdh,
`_c985` as dm_instrument_prefer_jdh,
`_c988` as dm_tea_prefer_jdh,
`_c984` as dm_sugarfree_prefer_jdh,
`_c987` as dm_steamer_prefer_jdh,
`_c989` as dm_ord_rfm_group_jdh,
`_c992` as dm_bjp_rfm_group_jdh,
`_c990` as dm_zly_rfm_group_jdh,
`_c991` as dm_tea_rfm_group_jdh,
`_c1003` as stu_type,
`_c1004` as stu_province,
`_c1005` as  stu_city,
`_c1002` as dc_group_type,
`_c1009` as dc_life_cycle_type_6_stage,
`_c1010` as  dc_life_cycle_value_level,
`_c1011` as dc_new_user_type,
`_c1012` as  dc_fiscal_year_type,
`_c1013` as is_first_ord_risk,
`_c1014` as  install_apk_soft,
`_c1015` as  dc_life_cycle_value,
`_c1016` as is_history_only,
split(regexp_replace(concat_ws(',',_c1007),"'",''),',') as market_media_first_cate_pref,
split(regexp_replace(concat_ws(',',_c1008),"'",''),',') as market_media_third_cate_pref,
 split(regexp_replace(concat_ws(',',_c1006),"'",''),',') as market_media_brand_pref,
null as fgd_poiid_around_user,
`_c1022` as shop_fgd_id_around_user,
`_c1020` as school_fgd_id_around_user,
`_c1023` as pred_student_type,
split(regexp_replace(concat_ws(',',_c1019),"'",''),',') as cellphone_change_brand_model,
`_c1017` as cellphone_change_prob,
`_c1018`  as cellphone_change_price_range,
`_c1028` as dim_clv_level,
`_c1026` as operator,
`_c1043` as baby_mother_inquiry_jdh,
`_c1042` as pregnancy_inquiry_jdh,
`_c1041` as pre_pregnancy_inquiry_jdh,
`_c1038` as baby_mother_cart_precise_jdh,
`_c1037` as baby_mother_ord_precise_jdh,
`_c1034` as  pregnancy_cart_precise_jdh,
`_c1033` as pregnancy_ord_precise_jdh,
`_c1030` as pre_pregnancy_cart_precise_jdh,
`_c1029` as pre_pregnancy_ord_precise_jdh,
`_c1049` as baby_mother_prefer_jdh,
`_c1048` as pregnancy_prefer_jdh,
`_c1047` as pre_pregnancy_prefer_jdh,
`_c1046` as baby_mother_ord_valuegrp_jdh,
`_c1045` as pregnancy_ord_valuegrp_jdh,
`_c1044` as pre_pregnancy_ord_valuegrp_jdh,
`_c1040` as baby_mother_browse_precise_jdh,
`_c1036` as  pregnancy_browse_precise_jdh,
`_c1032` as pre_pregnancy_browse_precise_jdh,
`_c1035` as pregnancy_follow_precise_jdh,
`_c1031` as pre_pregnancy_follow_precise_jdh,
`_c1039` as baby_mother_follow_precise_jdh,
`_c1051` as global_first_cate,
`_c1050` as global_first,
mkt_delivery_addr_county as delivery_addr_county,
`_c1053` as user_3c_digital_attention_days,
`_c1055` as  user_3c_digital_order_days,
`_c1054` as user_3c_digital_view_days,
`_c1052` as user_theme_fensi_sx,
`_c1060` as user_brand_id,
`_c1062` as user_brand_channel,
`_c1061` as user_brand_level,
`_c1056` as user_first_cate_has_order,
`_c1057` as user_second_cate_has_order,
`_c1064` as user_child_predict_gender,
`_c1063` as user_child_predict_age_range,
`_c1065` as user_child_real_age_range,
`_c1066` as user_child_real_gender,
plus_user_stage,
`_c1068` as user_3C_purchpower_level,
`_c1069` as user_3C_purchpower_score,
    null as finan_phone_province,
    null as user_third_cate_has_order
 from  app.app_l01_userprofile_bkflow_test_da  where crc32(md5(user_log_acct_original))%19="${mod}"
    """
    table_name = "app_l01_userprofile_bkflow_test_da_test"
  }
}
filter {

}

output {
  clickhouse {
    host = ""${host}""
    clickhouse.socket_timeout = 300000
    database = "label"
    table = "app_l01_userprofile_active_da_new_local1125"
    username = "default"
    password = "default"
    bulk_size = 5000000
  }
}

Running Command(启动命令)

请在这里添加完整的waterdrop启动命令及其参数。

Error Exception Add exception about the problem here.(您可以将程序报错信息粘贴在此处) ru.yandex.clickhouse.except.ClickHouseException: ClickHouse exception, code: 62, host: 11.3.214.5, port: 8123; Code: 62, e.displayText() = DB::Exception: Syntax error: failed at position 3701: 1y_jdmall_amount_level,is_get_cash_user,perscare_purchase_power,heal_yaojisong_store_distance,dm_health_products_prefer_jdh,dm_instrument_prefer_jdh,dm_tea_pref. Wrong number (version 19.9.5.36) Screenshots If applicable, add screenshots to help explain your problem.(您可以将相关截图放在此处)问题:ck的表结构里有数字开头的字段命名推数就会报错,如上面input的1y_jdmall_amount_level 。 image

RickyHuo commented 3 years ago

此问题在最新版本中已修复