apache / iceberg-python

Apache PyIceberg
https://py.iceberg.apache.org/
Apache License 2.0
309 stars 114 forks source link

Crash when writing map type with unsigned types #837

Open vtk9 opened 1 week ago

vtk9 commented 1 week ago

Apache Iceberg version

0.6.0 (latest release)

Please describe the bug 🐞

There might be other combinations that don't work

Note: works with (when replaced with line in reproducer)

Note: also crashes with (when replaced with line in reproducer)

Crashes with both pyarrow 15 and 16, though with 16 there's less output

from pyiceberg.catalog.sql import SqlCatalog
import pyarrow as pa

pylist = [{'nested_map': {5: {5: 5}}}]
arrow_schema = pa.schema(
    [
        pa.field('nested_map', pa.map_(pa.int8(), pa.map_(pa.int8(), pa.int8()))), 
    ],
)
arrow_table = pa.Table.from_pylist(pylist, schema=arrow_schema)

catalog = SqlCatalog(
    'test_catalog',
    **{
        'type': "sql'",
        'uri': 'sqlite:///pyiceberg.db',
    },
)

namespace = 'test_ns'
table_name = 'test_table'

catalog.create_namespace(namespace=namespace)
new_table = catalog.create_table(
    identifier=f'{namespace}.{table_name}',
    schema=arrow_schema,
    location='.',
)

new_table.append(arrow_table)

pyarrow 15.0.2

/Users/voltrondata/github-actions-runner/_work/crossbow/crossbow/arrow/cpp/src/arrow/array/array_nested.cc:848:  Check failed: _s.ok() Operation failed: ValidateChildData(data->child_data)
Bad status: Invalid: Map array keys array should have no nulls
0   libarrow.1500.dylib                 0x0000000128c3bfb8 _ZN5arrow4util7CerrLog14PrintBackTraceEv + 44
1   libarrow.1500.dylib                 0x0000000128c3bf6c _ZN5arrow4util7CerrLogD2Ev + 184
2   libarrow.1500.dylib                 0x0000000128c3beac _ZN5arrow4util7CerrLogD0Ev + 12
3   libarrow.1500.dylib                 0x0000000128c36608 _ZN5arrow4util8ArrowLogD1Ev + 48
4   libarrow.1500.dylib                 0x0000000128e6d8d4 _ZN5arrow8MapArray7SetDataERKNSt3__110shared_ptrINS_9ArrayDataEEE + 508
5   libarrow.1500.dylib                 0x0000000128e6da54 _ZN5arrow8MapArrayC1ERKNSt3__110shared_ptrINS_9ArrayDataEEE + 64
6   libarrow.1500.dylib                 0x0000000128d5bf40 _ZN5arrow9MakeArrayERKNSt3__110shared_ptrINS_9ArrayDataEEE + 1180
7   libarrow.1500.dylib                 0x0000000128b4d898 _ZN5arrow7compute6detail12_GLOBAL__N_114ToChunkedArrayERKNSt3__16vectorINS_5DatumENS3_9allocatorIS5_EEEERKNS_10TypeHolderE + 156
8   libarrow.1500.dylib                 0x0000000128b4cc50 _ZN5arrow7compute6detail12_GLOBAL__N_114ScalarExecutor11WrapResultsERKNSt3__16vectorINS_5DatumENS4_9allocatorIS6_EEEESB_ + 104
9   libarrow.1500.dylib                 0x0000000128b59184 _ZN5arrow7compute6detail20FunctionExecutorImpl7ExecuteERKNSt3__16vectorINS_5DatumENS3_9allocatorIS5_EEEEx + 2156
10  libarrow.1500.dylib                 0x0000000128ae9598 _ZN5arrow7compute12_GLOBAL__N_115ExecuteInternalERKNS0_8FunctionENSt3__16vectorINS_5DatumENS5_9allocatorIS7_EEEExPKNS0_15FunctionOptionsEPNS0_11ExecContextE + 480
11  libarrow.1500.dylib                 0x0000000128ae9320 _ZNK5arrow7compute8Function7ExecuteERKNSt3__16vectorINS_5DatumENS2_9allocatorIS4_EEEEPKNS0_15FunctionOptionsEPNS0_11ExecContextE + 76
12  libarrow.1500.dylib                 0x0000000128b4a518 _ZNK5arrow7compute8internal12_GLOBAL__N_116CastMetaFunction11ExecuteImplERKNSt3__16vectorINS_5DatumENS4_9allocatorIS6_EEEEPKNS0_15FunctionOptionsEPNS0_11ExecContextE + 640
13  libarrow.1500.dylib                 0x0000000128aea758 _ZNK5arrow7compute12MetaFunction7ExecuteERKNSt3__16vectorINS_5DatumENS2_9allocatorIS4_EEEEPKNS0_15FunctionOptionsEPNS0_11ExecContextE + 236
14  _compute.cpython-39-darwin.so       0x0000000119048b38 _ZL43__pyx_pw_7pyarrow_8_compute_8Function_7callP7_objectPKS0_lS0_ + 1608
15  libpython3.9.dylib                  0x00000001027d1e30 method_vectorcall + 176
16  libpython3.9.dylib                  0x00000001027cf78c PyVectorcall_Call + 160
17  _compute.cpython-39-darwin.so       0x0000000119074fe0 _ZL43__pyx_pw_7pyarrow_8_compute_11call_functionP7_objectPKS0_lS0_ + 1744
18  libpython3.9.dylib                  0x00000001028a4ab4 call_function + 440
19  libpython3.9.dylib                  0x00000001028a1c28 _PyEval_EvalFrameDefault + 22592
20  libpython3.9.dylib                  0x00000001028a58a8 _PyEval_EvalCode + 2680
21  libpython3.9.dylib                  0x00000001027cfb04 _PyFunction_Vectorcall + 236
22  libpython3.9.dylib                  0x00000001027cf78c PyVectorcall_Call + 160
23  lib.cpython-39-darwin.so            0x000000010660e440 _ZL44__pyx_pw_7pyarrow_3lib_12ChunkedArray_48castP7_objectPKS0_lS0_ + 1376
24  libpython3.9.dylib                  0x00000001027d1e30 method_vectorcall + 176
25  libpython3.9.dylib                  0x00000001027cf78c PyVectorcall_Call + 160
26  lib.cpython-39-darwin.so            0x00000001066242d8 _ZL36__pyx_pw_7pyarrow_3lib_5Table_25castP7_objectPKS0_lS0_ + 2920
27  libpython3.9.dylib                  0x00000001028a4ab4 call_function + 440
28  libpython3.9.dylib                  0x00000001028a1b8c _PyEval_EvalFrameDefault + 22436
29  libpython3.9.dylib                  0x00000001028a58a8 _PyEval_EvalCode + 2680
30  libpython3.9.dylib                  0x00000001027cfb04 _PyFunction_Vectorcall + 236
31  libpython3.9.dylib                  0x00000001027d1e30 method_vectorcall + 176
32  libpython3.9.dylib                  0x00000001028a4ab4 call_function + 440
33  libpython3.9.dylib                  0x00000001028a1ca0 _PyEval_EvalFrameDefault + 22712
34  libpython3.9.dylib                  0x00000001028a58a8 _PyEval_EvalCode + 2680
35  libpython3.9.dylib                  0x00000001027cfb04 _PyFunction_Vectorcall + 236
36  libpython3.9.dylib                  0x00000001028a4ab4 call_function + 440
37  libpython3.9.dylib                  0x00000001028a1b8c _PyEval_EvalFrameDefault + 22436
38  libpython3.9.dylib                  0x00000001028a58a8 _PyEval_EvalCode + 2680
39  libpython3.9.dylib                  0x000000010289c320 PyEval_EvalCode + 80
40  libpython3.9.dylib                  0x00000001028e1ddc run_mod + 180
41  libpython3.9.dylib                  0x00000001028e1fb0 pyrun_file + 180
42  libpython3.9.dylib                  0x00000001028dffec PyRun_SimpleFileExFlags + 836
43  libpython3.9.dylib                  0x00000001028fdba8 Py_RunMain + 1620
44  libpython3.9.dylib                  0x00000001028fe074 pymain_main + 324
45  libpython3.9.dylib                  0x00000001028fe124 Py_BytesMain + 56
46  dyld                                0x00000001a00cbf28 start + 2236
Abort trap: 6

Pyarrow 16.1.0

/Users/runner/work/crossbow/crossbow/arrow/cpp/src/arrow/array/array_nested.cc:848:  Check failed: _s.ok() Operation failed: ValidateChildData(data->child_data)
Bad status: Invalid: Map array keys array should have no nulls
Abort trap: 6