scylladb / scylla-migrator

Migrate data extract using Spark to Scylla, normally from Cassandra/parquet files. Alt. from DynamoDB to Scylla Alternator.
https://migrator.docs.scylladb.com/stable/
Apache License 2.0
55 stars 34 forks source link

Many types in one column, migrator not working #51

Open phenriqueabr opened 3 years ago

phenriqueabr commented 3 years ago

I'm trying to use scylla-migrator with table below and gives me an error. If I use an table without column blocks like, it runs normally.

CREATE TABLE prom.teste (
    store text PRIMARY KEY,
    blocks list<frozen<tuple<text, frozen<list<frozen<tuple<text, text, text, frozen<set<text>>, bigint, boolean, frozen<map<text, text>>, boolean>>>>>>>
) WITH bloom_filter_fp_chance = 0.01
    AND caching = {'keys': 'ALL', 'rows_per_partition': 'NONE'}
    AND comment = ''
    AND compaction = {'class': 'org.apache.cassandra.db.compaction.SizeTieredCompactionStrategy', 'max_threshold': '32', 'min_threshold': '4'}
    AND compression = {'chunk_length_in_kb': '64', 'class': 'org.apache.cassandra.io.compress.LZ4Compressor'}
    AND crc_check_chance = 1.0
    AND dclocal_read_repair_chance = 0.1
    AND default_time_to_live = 0
    AND gc_grace_seconds = 864000
    AND max_index_interval = 2048
    AND memtable_flush_period_in_ms = 0
    AND min_index_interval = 128
    AND read_repair_chance = 0.0
    AND speculative_retry = '99PERCENTILE';
21/05/03 18:01:29 WARN TaskSetManager: Lost task 8.528 in stage 0.0 (TID 2066, 172.16.66.235, executor 0): java.lang.NullPointerException
        at com.datastax.spark.connector.types.TupleType$DriverTupleValueConverter$$anonfun$convertPF$2$$anonfun$applyOrElse$2.apply(TupleType.scala:119)
        at com.datastax.spark.connector.types.TupleType$DriverTupleValueConverter$$anonfun$convertPF$2$$anonfun$applyOrElse$2.apply(TupleType.scala:116)
        at scala.collection.immutable.Range.foreach(Range.scala:160)
        at com.datastax.spark.connector.types.TupleType$DriverTupleValueConverter$$anonfun$convertPF$2.applyOrElse(TupleType.scala:116)
        at com.datastax.spark.connector.types.TypeConverter$class.convert(TypeConverter.scala:44)
        at com.datastax.spark.connector.types.TupleType$DriverTupleValueConverter.convert(TupleType.scala:105)
        at com.datastax.spark.connector.types.TypeConverter$OptionToNullConverter$$anonfun$convertPF$38.applyOrElse(TypeConverter.scala:902)
        at com.datastax.spark.connector.types.TypeConverter$class.convert(TypeConverter.scala:44)
        at com.datastax.spark.connector.types.TypeConverter$OptionToNullConverter.com$datastax$spark$connector$types$NullableTypeConverter$$super$convert(TypeConverter.scala:885)
        at com.datastax.spark.connector.types.NullableTypeConverter$class.convert(TypeConverter.scala:57)
        at com.datastax.spark.connector.types.TypeConverter$OptionToNullConverter.convert(TypeConverter.scala:885)
        at com.datastax.spark.connector.types.TypeConverter$CollectionConverter$$anonfun$com$datastax$spark$connector$types$TypeConverter$CollectionConverter$$newCollection$1.apply(TypeConverter.scala:694)
        at com.datastax.spark.connector.types.TypeConverter$CollectionConverter$$anonfun$com$datastax$spark$connector$types$TypeConverter$CollectionConverter$$newCollection$1.apply(TypeConverter.scala:693)
        at scala.collection.immutable.List.foreach(List.scala:392)
        at com.datastax.spark.connector.types.TypeConverter$CollectionConverter.com$datastax$spark$connector$types$TypeConverter$CollectionConverter$$newCollection(TypeConverter.scala:693)
        at com.datastax.spark.connector.types.TypeConverter$CollectionConverter$$anonfun$convertPF$37.applyOrElse(TypeConverter.scala:703)
        at com.datastax.spark.connector.types.TypeConverter$class.convert(TypeConverter.scala:44)
        at com.datastax.spark.connector.types.TypeConverter$CollectionConverter.convert(TypeConverter.scala:686)
        at com.datastax.spark.connector.types.TypeConverter$OptionToNullConverter$$anonfun$convertPF$38.applyOrElse(TypeConverter.scala:902)
        at com.datastax.spark.connector.types.TypeConverter$class.convert(TypeConverter.scala:44)
        at com.datastax.spark.connector.types.TypeConverter$OptionToNullConverter.com$datastax$spark$connector$types$NullableTypeConverter$$super$convert(TypeConverter.scala:885)
        at com.datastax.spark.connector.types.NullableTypeConverter$class.convert(TypeConverter.scala:57)
        at com.datastax.spark.connector.types.TypeConverter$OptionToNullConverter.convert(TypeConverter.scala:885)
        at com.datastax.spark.connector.types.TupleType$DriverTupleValueConverter$$anonfun$convertPF$2$$anonfun$applyOrElse$2.apply(TupleType.scala:118)
        at com.datastax.spark.connector.types.TupleType$DriverTupleValueConverter$$anonfun$convertPF$2$$anonfun$applyOrElse$2.apply(TupleType.scala:116)
        at scala.collection.immutable.Range.foreach(Range.scala:160)
        at com.datastax.spark.connector.types.TupleType$DriverTupleValueConverter$$anonfun$convertPF$2.applyOrElse(TupleType.scala:116)
        at com.datastax.spark.connector.types.TypeConverter$class.convert(TypeConverter.scala:44)
        at com.datastax.spark.connector.types.TupleType$DriverTupleValueConverter.convert(TupleType.scala:105)
        at com.datastax.spark.connector.types.TypeConverter$OptionToNullConverter$$anonfun$convertPF$38.applyOrElse(TypeConverter.scala:902)
        at com.datastax.spark.connector.types.TypeConverter$class.convert(TypeConverter.scala:44)
        at com.datastax.spark.connector.types.TypeConverter$OptionToNullConverter.com$datastax$spark$connector$types$NullableTypeConverter$$super$convert(TypeConverter.scala:885)
        at com.datastax.spark.connector.types.NullableTypeConverter$class.convert(TypeConverter.scala:57)
        at com.datastax.spark.connector.types.TypeConverter$OptionToNullConverter.convert(TypeConverter.scala:885)
        at com.datastax.spark.connector.types.TypeConverter$CollectionConverter$$anonfun$com$datastax$spark$connector$types$TypeConverter$CollectionConverter$$newCollection$1.apply(TypeConverter.scala:694)
        at com.datastax.spark.connector.types.TypeConverter$CollectionConverter$$anonfun$com$datastax$spark$connector$types$TypeConverter$CollectionConverter$$newCollection$1.apply(TypeConverter.scala:693)
        at scala.collection.immutable.List.foreach(List.scala:392)
        at com.datastax.spark.connector.types.TypeConverter$CollectionConverter.com$datastax$spark$connector$types$TypeConverter$CollectionConverter$$newCollection(TypeConverter.scala:693)
        at com.datastax.spark.connector.types.TypeConverter$CollectionConverter$$anonfun$convertPF$37.applyOrElse(TypeConverter.scala:703)
        at com.datastax.spark.connector.types.TypeConverter$class.convert(TypeConverter.scala:44)
        at com.datastax.spark.connector.types.TypeConverter$CollectionConverter.convert(TypeConverter.scala:686)
        at com.datastax.spark.connector.types.TypeConverter$OptionToNullConverter$$anonfun$convertPF$38.applyOrElse(TypeConverter.scala:902)
        at com.datastax.spark.connector.types.TypeConverter$class.convert(TypeConverter.scala:44)
        at com.datastax.spark.connector.types.TypeConverter$OptionToNullConverter.com$datastax$spark$connector$types$NullableTypeConverter$$super$convert(TypeConverter.scala:885)
        at com.datastax.spark.connector.types.NullableTypeConverter$class.convert(TypeConverter.scala:57)
        at com.datastax.spark.connector.types.TypeConverter$OptionToNullConverter.convert(TypeConverter.scala:885)
        at com.datastax.spark.connector.writer.BoundStatementBuilder$$anonfun$bind$1.apply$mcVI$sp(BoundStatementBuilder.scala:108)
        at scala.collection.immutable.Range.foreach$mVc$sp(Range.scala:160)
        at com.datastax.spark.connector.writer.BoundStatementBuilder.bind(BoundStatementBuilder.scala:104)
        at com.datastax.spark.connector.writer.GroupingBatchBuilder.next(GroupingBatchBuilder.scala:105)
        at com.datastax.spark.connector.writer.GroupingBatchBuilder.next(GroupingBatchBuilder.scala:30)
        at scala.collection.Iterator$class.foreach(Iterator.scala:891)
        at com.datastax.spark.connector.writer.GroupingBatchBuilder.foreach(GroupingBatchBuilder.scala:30)
        at com.datastax.spark.connector.writer.TableWriter$$anonfun$writeInternal$2.apply(TableWriter.scala:241)
        at com.datastax.spark.connector.writer.TableWriter$$anonfun$writeInternal$2.apply(TableWriter.scala:210)
        at com.datastax.spark.connector.cql.CassandraConnector$$anonfun$withSessionDo$1.apply(CassandraConnector.scala:112)
        at com.datastax.spark.connector.cql.CassandraConnector$$anonfun$withSessionDo$1.apply(CassandraConnector.scala:111)
        at com.datastax.spark.connector.cql.CassandraConnector.closeResourceAfterUse(CassandraConnector.scala:129)
        at com.datastax.spark.connector.cql.CassandraConnector.withSessionDo(CassandraConnector.scala:111)
        at com.datastax.spark.connector.writer.TableWriter.writeInternal(TableWriter.scala:210)
        at com.datastax.spark.connector.writer.TableWriter.insert(TableWriter.scala:188)
        at com.datastax.spark.connector.writer.TableWriter.write(TableWriter.scala:175)
        at com.datastax.spark.connector.RDDFunctions$$anonfun$saveToCassandra$1.apply(RDDFunctions.scala:38)
        at com.datastax.spark.connector.RDDFunctions$$anonfun$saveToCassandra$1.apply(RDDFunctions.scala:38)
        at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
        at org.apache.spark.scheduler.Task.run(Task.scala:123)
        at org.apache.spark.executor.Executor$TaskRunner$$anonfun$10.apply(Executor.scala:408)
        at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1360)
        at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:414)
        at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
        at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
        at java.lang.Thread.run(Thread.java:748)