hail-is / hail

Cloud-native genomic dataframes and batch computing
https://hail.is
MIT License
966 stars 242 forks source link

NoSuchElementException after drop #9016

Closed konradjk closed 4 years ago

konradjk commented 4 years ago

Happening on current master (fd932b2bff17)

def separate_results_mt_by_pop(mt, col_field = 'pheno_data', entry_field = 'summary_stats'):
    mt = mt.annotate_cols(col_array=hl.zip_with_index(mt[col_field])).explode_cols('col_array')
    mt = mt.transmute_cols(pop_index=mt.col_array[0], **{col_field: mt.col_array[1]})
    mt = mt.annotate_entries(**{entry_field: mt[entry_field][mt.pop_index]})
    return mt

mt.cols().show()
2020-06-24 15:44:46 Hail: INFO: Coerced sorted dataset
+---------------+-----------+--------------+----------+----------+---------------------------------+-----------+
| trait_type    | phenocode | pheno_sex    | coding   | modifier | pop                             | pop_index |
+---------------+-----------+--------------+----------+----------+---------------------------------+-----------+
| str           | str       | str          | str      | str      | array<str>                      |     int32 |
+---------------+-----------+--------------+----------+----------+---------------------------------+-----------+
| "biomarkers"  | "30820"   | "both_sexes" | ""       | "irnt"   | ["AFR","AMR","CSA","EUR","MID"] |         0 |
| "categorical" | "100260"  | "both_sexes" | "100260" | ""       | ["AFR","AMR","CSA","EAS","EUR"] |         0 |
| "categorical" | "102130"  | "both_sexes" | "102130" | ""       | ["AFR","CSA","EAS","EUR","MID"] |         0 |
| "categorical" | "102930"  | "both_sexes" | "102930" | ""       | ["AFR","AMR","CSA","EAS","EUR"] |         0 |
| "categorical" | "103140"  | "both_sexes" | "103140" | ""       | ["AFR","CSA","EAS","EUR","MID"] |         0 |
| "categorical" | "1150"    | "both_sexes" | "3"      | ""       | ["AMR","CSA","EAS","EUR","MID"] |         0 |
| "categorical" | "1448"    | "both_sexes" | "4"      | ""       | ["AFR","CSA","EAS","EUR","MID"] |         0 |
| "categorical" | "1508"    | "both_sexes" | "4"      | ""       | ["AFR","CSA","EAS","EUR","MID"] |         0 |
| "categorical" | "1647"    | "both_sexes" | "6"      | ""       | ["AFR","AMR","CSA","EAS","EUR"] |         0 |
| "categorical" | "1835"    | "both_sexes" | "1835"   | ""       | ["AFR","AMR","EAS","EUR","MID"] |         0 |
+---------------+-----------+--------------+----------+----------+---------------------------------+-----------+
showing top 10 rows
mt.drop('pop_index').cols().show()
2020-06-24 15:52:20 Hail: WARN: cols(): Resulting column table is sorted by 'col_key'.
    To preserve matrix table column order, first unkey columns with 'key_cols_by()'
Traceback (most recent call last):
  File "/opt/local/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/IPython/core/formatters.py", line 224, in catch_format_error
    r = method(self, *args, **kwargs)
  File "/opt/local/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/IPython/core/formatters.py", line 702, in __call__
    printer.pretty(obj)
  File "/opt/local/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/IPython/lib/pretty.py", line 394, in pretty
    return _repr_pprint(obj, self, cycle)
  File "/opt/local/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/IPython/lib/pretty.py", line 700, in _repr_pprint
    output = repr(obj)
  File "/Users/konradk/hail/hail/python/hail/table.py", line 1269, in __repr__
    return self.__str__()
  File "/Users/konradk/hail/hail/python/hail/table.py", line 1266, in __str__
    return self._ascii_str()
  File "/Users/konradk/hail/hail/python/hail/table.py", line 1292, in _ascii_str
    rows, has_more, dtype = self.data()
  File "/Users/konradk/hail/hail/python/hail/table.py", line 1276, in data
    rows, has_more = t._take_n(self.n)
  File "/Users/konradk/hail/hail/python/hail/table.py", line 1423, in _take_n
    rows = self.take(n + 1)
  File "<decorator-gen-1095>", line 2, in take
  File "/Users/konradk/hail/hail/python/hail/typecheck/check.py", line 614, in wrapper
    return __original_func(*args_, **kwargs_)
  File "/Users/konradk/hail/hail/python/hail/table.py", line 2087, in take
    return self.head(n).collect(_localize)
  File "<decorator-gen-1089>", line 2, in collect
  File "/Users/konradk/hail/hail/python/hail/typecheck/check.py", line 614, in wrapper
    return __original_func(*args_, **kwargs_)
  File "/Users/konradk/hail/hail/python/hail/table.py", line 1886, in collect
    return Env.backend().execute(e._ir)
  File "/Users/konradk/hail/hail/python/hail/backend/spark_backend.py", line 296, in execute
    result = json.loads(self._jhc.backend().executeJSON(jir))
  File "/Users/konradk/programs/spark-2.4.1-bin-hadoop2.7/python/lib/py4j-0.10.7-src.zip/py4j/java_gateway.py", line 1257, in __call__
    answer, self.gateway_client, self.target_id, self.name)
  File "/Users/konradk/hail/hail/python/hail/backend/spark_backend.py", line 41, in deco
    'Error summary: %s' % (deepest, full, hail.__version__, deepest)) from None
hail.utils.java.FatalError: NoSuchElementException: key not found: 1
[...]
java.util.NoSuchElementException: key not found: 1
    at scala.collection.MapLike$class.default(MapLike.scala:228)
    at scala.collection.AbstractMap.default(Map.scala:59)
    at scala.collection.MapLike$class.apply(MapLike.scala:141)
    at scala.collection.AbstractMap.apply(Map.scala:59)
    at is.hail.types.encoded.EBaseStruct.fieldType(EBaseStruct.scala:34)
    at is.hail.types.encoded.EBaseStruct$$anonfun$8.apply(EBaseStruct.scala:84)
    at is.hail.types.encoded.EBaseStruct$$anonfun$8.apply(EBaseStruct.scala:83)
    at scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:234)
    at scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:234)
    at scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59)
    at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:48)
    at scala.collection.TraversableLike$class.map(TraversableLike.scala:234)
    at scala.collection.AbstractTraversable.map(Traversable.scala:104)
    at is.hail.types.encoded.EBaseStruct._decodedPType(EBaseStruct.scala:83)
    at is.hail.types.encoded.EType.decodedPType(EType.scala:159)
    at is.hail.types.encoded.EBaseStruct$$anonfun$7.apply(EBaseStruct.scala:78)
    at is.hail.types.encoded.EBaseStruct$$anonfun$7.apply(EBaseStruct.scala:77)
    at scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:234)
    at scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:234)
    at scala.collection.IndexedSeqOptimized$class.foreach(IndexedSeqOptimized.scala:33)
    at scala.collection.mutable.WrappedArray.foreach(WrappedArray.scala:35)
    at scala.collection.TraversableLike$class.map(TraversableLike.scala:234)
    at scala.collection.AbstractTraversable.map(Traversable.scala:104)
    at is.hail.types.encoded.EBaseStruct._decodedPType(EBaseStruct.scala:77)
    at is.hail.types.encoded.EType.decodedPType(EType.scala:159)
    at is.hail.types.encoded.EType$.buildDecoder(EType.scala:255)
    at is.hail.types.encoded.EType.buildDecoder(EType.scala:36)
    at is.hail.io.TypedCodecSpec.buildDecoder(TypedCodecSpec.scala:41)
    at is.hail.expr.ir.TableParallelize.execute(TableIR.scala:784)
    at is.hail.expr.ir.TableKeyBy.execute(TableIR.scala:835)
    at is.hail.expr.ir.TableMapRows.execute(TableIR.scala:1432)
    at is.hail.expr.ir.TableOrderBy.execute(TableIR.scala:2214)
    at is.hail.expr.ir.TableSubset$class.execute(TableIR.scala:950)
    at is.hail.expr.ir.TableHead.execute(TableIR.scala:958)
    at is.hail.expr.ir.TableMapRows.execute(TableIR.scala:1432)
johnc1231 commented 4 years ago

Random came up @catoverdrive