rcongiu / Hive-JSON-Serde

Read - Write JSON SerDe for Apache Hive.
Other
733 stars 391 forks source link

Error when using a large scheme #212

Open buronahodok opened 5 years ago

buronahodok commented 5 years ago

Hello, Developers! I used the Cloudera Hadoop 5.13.1, the version of Hive 0.14. Json-serde-cdh5-shim-1.3.9-SNAPSHOT.jar was added to Hadoop cluster. I use a schema for our JSON files:

Click to expand ``` CREATE TABLE TEST.T_TXN_JSON_SERDE ( shift struct < shift:struct < array:struct < begin_time:struct < datetime:string > ,keycode:string ,cash_number:int ,hostname:string ,kkm_model_name:string ,kkm_model_version:string ,eklz_act_date:struct < datetime:string > ,free_kkm_fm:int ,end_time:struct < datetime:string > > > ,kkm_serial:string ,shift_number:string > ,operation struct < operation:struct < array:struct < module_version:string ,point_code:string ,optype:string ,wholesale_number:string ,wholesale_contr:string ,wholesale_opguid:struct < guid:string > ,im_order:struct < array:struct < number:string ,packets:struct < table:struct < rows:array < struct < code:string ,price:double ,quant:double > > > > ,packets_count:int > > ,create_time:struct < datetime:string > ,save_time:struct < datetime:string > ,readonly:boolean ,deleted:boolean ,history:struct < table:struct < rows:array < struct < action:string ,act_time:struct < datetime:string > ,keycode:string > > > > ,assistant:string ,content:struct < table:struct < rows:array < struct < code:string ,quant:double ,price:double ,nds_rate:struct < guid:string > ,nds_sum:double ,save_time:struct < datetime:string > ,round:int ,excise_mark:array > > > > ,hist_content:struct < table:struct < rows:array < struct < action:string ,act_time:struct < datetime:string > ,code:string ,quant:double ,old_quant:double ,price:double ,keycode:string > > > > ,cashless:struct < table:struct < rows:array < struct < pay_system:string ,paysys_type:string ,op_num:string ,pay_time:struct < datetime:string > ,term_num:string ,point_num:string ,auth_code:string ,card_app:string ,card_num:string ,opsum:double ,slip:struct < array:struct < check_text1:string ,check_text2:string > > ,nominal:double > > > > ,card:struct < table:struct < rows:array < struct < code:string ,card_num:string ,price:double > > > > ,present:struct < table:struct < rows:array < struct < code:string ,quant:int ,price:double > > > > ,coupons:struct < table:struct < rows:array < struct < code:string ,save_time:struct < datetime:string > > > > > ,correct_paysystem:boolean ,cashsum:double ,round_sum:double ,discount:double ,discount_actions:struct < table:struct < rows:array < struct < id:struct < guid:string > ,format:string ,count:int ,sum:double ,arts:struct < table:struct < rows:array < struct < id:struct < guid:string > ,code:string ,price:double ,discount:double ,quantity:double ,parent_action_id:struct < guid:string > ,parent_id:struct < guid:string > > > > > ,receipt_text:string > > > > ,stickers_count:int ,sticker_actions:struct < table:struct < rows:array < struct < id:struct < guid:string > ,format:string ,count:int ,receipt_text:string > > > > ,information_action:struct < table:struct < rows:array < struct < id:struct < guid:string > ,format:string ,count:int ,receipt_text:string > > > > ,coupons_count:int ,coupons_actions:struct < table:struct < rows:array < struct < id:struct < guid:string > ,format:string ,short_code:string ,count:int ,receipt_text:string > > > > ,smch_actions:struct < table:struct < rows:array < struct < id:string ,sum:double ,arts:struct < table:struct < rows:array < struct < code:string ,price:double ,discount:double ,quantity:double > > > > > > > > ,approved_misc_params:struct < array:struct < buyer_age:boolean ,stickers_count:int ,buyer_phone_number:string > > ,discounted_unique_coupons:struct < table:struct < rows:array < struct < barcode:string ,action_id:struct < table:struct < rows:array < struct < action_id:struct < guid:string > > > > > ,result:boolean ,error:boolean ,message:boolean ,actions:struct < table:struct < rows:array < struct < action_id:struct < guid:string > > > > > ,save_time:struct < datetime:string > > > > ,generated_unique_coupons:struct < table:struct < rows:array < struct < action_id:struct < guid:string > ,receipt_text:string ,barcodes:struct < table:struct < rows:array < struct < barcode:string ,verif_barcode:string > > > > > > > > ,buyer_souvenirs:struct < table:struct < rows:array < struct < code:string ,quantity:int > > > > ,loyalty_cards:struct < table:struct < rows:array < struct < code:string ,barcode:string ,card_num:string ,create_time:struct < datetime:string > > > > > ,produced_loyalty_cards:struct < table:struct < rows:array < struct < barcode:string ,card_num:string ,action_count:int ,create_time:struct < datetime:string > > > > > ,egais_slips:struct < array:struct < alco_slip:struct < array:struct < url:string ,sign:string ,sign_time:struct < datetime:string > ,number:int ,xml_check:string ,arts:struct < table:struct < rows:array < struct < code:string > > > > > > > > ,check_params:struct < array:struct < check_type:int ,use_journal:int ,lines_count:int > > ,number:string ,sale_number:string ,sale_id:struct < guid:string > ,fiscal_number:string ,fiscal_sign:string ,cashsum:double ,retsum:double ,cashless_sum:double ,cashless_ret:double ,creditsum:double ,creditsum_ret:double ,certsum:double ,certsum_ret:double ,cash_serial:string ,zrep_num:string ,cash_sumrest:double ,consign_sum:double ,payout_sum:double ,certcash_sum:double ,wholesale_sum:double ,hist_barcodes:struct < table:struct < rows:array < struct < code:string ,barcode:string ,quant:double > > > > ,barcodes:struct < table:struct < rows:array < struct < code:string ,barcode:string ,quant:double > > > > > ,egais_slips_error:struct < array:struct < alco_slip:struct < array:struct < shift_number:int ,direct_slip: boolean ,egais_doc_number:int ,arts:struct < table:struct < rows:array < struct < code:string ,quant:double ,ubc:string > > > > > > > > ,multipack:struct < table:struct < rows:array < struct < code:string ,arts:struct < table:struct < rows:array < struct < code:string ,price:double ,excise_mark:array > > > > > > > > ,markdown:struct < table:struct < rows:array < struct < code:string ,quant:double ,coupon_barcode:string ,coupon_day:string ,art_barcode:string ,coupon_num:string ,expiration_date:struct < date:string > ,markdown_sum:double ,action_id:struct < guid:string > ,begindate:struct < date:string > ,enddate:struct < date:string > ,version:string ,id_shtask:string ,draw_num:string ,shtask_guid:struct < guid:string > ,signal_guid:struct < guid:string > ,signal_discount:double ,signal_price:double ,shtask_expiration_date:struct < date:string > ,markdown_action_id:struct < guid:string > ,gs1_action_id:struct < guid:string > > > > > ,fdo_pos:struct < table:struct < rows:array < struct < code:string ,quant:double ,discount:double ,nds_percent:double ,nds_rate:struct < guid:string > ,action_id:struct < guid:string > ,nds_sum:double > > > > ,cashbox_type:int ,nds_inc:struct < table:struct < rows:array < struct < nds_rate:struct < guid:string > ,nds_sum:double > > > > ,nds_ret:struct < table:struct < rows:array < struct < nds_rate:struct < guid:string > ,nds_sum:double > > > > ,fn_document_tlv:string > > ,id:string > ) ROW FORMAT SERDE 'org.openx.data.jsonserde.JsonSerDe' ```

A table has been created succesfully, but SELECT from the empty table causes an error:

Click to expand ``` Error: Error while compiling statement: FAILED: IllegalArgumentException Error: : expected at the end of 'struct,keycode:string,cash_number:int,hostname:string,kkm_model_name:string,kkm_model_version:string,eklz_act_date:struct,free_kkm_fm:int,end_time:struct>>,kkm_serial:string,shift_number:string>:struct,im_order:struct>>>,packets_count:int>>,create_time:struct,save_time:struct,readonly:boolean,deleted:boolean,history:struct,keycode:string>>>>,assistant:string,content:struct,nds_sum:double,save_time:struct,round:int,excise_mark:array>>>>,hist_content:struct,code:string,quant:double,old_quant:double,price:double,keycode:string>>>>,cashless:struct,term_num:string,point_num:string,auth_code:string,card_app:string,card_num:string,opsum:double,slip:struct>,nominal:double>>>>,card:struct>>>,present:struct>>>,coupons:struct>>>>,correct_paysystem:boolean,cashsum:double,round_sum:double,discount:double,discount_actions:struct,format:string,count:int,sum:double,arts:struct,code:string,price:double,discount:double,quantity:double,parent_action_id:struct,parent_id:struct>>>>,receipt_text:string>>>>,stickers_count:int,sticker_actions:struct,format:string,count:int,receipt_text:string>>>>,information_action:struct,format:string,count:int,receipt_text:string>>>>,coupons_count:int,coupons_actions:struct,format:string,short_code:string,count:int,receipt_text:string>>>>,smch_actions:struct>>>>>>>,approved_misc_params:struct>,discounted_unique_coupons:struct>>>>,result:boolean,error:boolean,message:boolean,actions:struct>>>>,save_time:struct>>>,generated_unique_coupons:struct,receipt_text:string,barcodes:struct>>>>>>>,buyer_souvenirs:struct>>>,loyalty_cards:struct>>>>,produced_loyalty_cards:struct>>>>,egais_slips:struct,number:int,xml_check:string,arts:struct>>>>>>>,check_params:struct>,number:string,sale_number:string,sale_id:struct,fiscal_number:string,fiscal_sign:string,cashsum:double,retsum:double,cashless_sum:double,cashless_ret:double,creditsum' SQLState: 42000 ErrorCode: 40000 ```

If I remove several partitions from the schema, SELECT works without errors.

How to avoid this problem?

rcongiu commented 4 years ago

the issue may be that some of your data does not actually conform to the schema.