Hello, While I'm trying train a retriever for RCR task. I'm getting this issue. Need some help to resolve this.
Traceback (most recent call last):
File "/home/ubuntu/anaconda3/envs/tevatron/lib/python3.8/runpy.py", line 194, in _run_module_as_main
return _run_code(code, main_globals, None,
File "/home/ubuntu/anaconda3/envs/tevatron/lib/python3.8/runpy.py", line 87, in _run_code
exec(code, run_globals)
File "/home/ubuntu/.vscode-server/extensions/ms-python.python-2023.22.1/pythonFiles/lib/python/debugpy/adapter/../../debugpy/launcher/../../debugpy/main.py", line 39, in
cli.main()
File "/home/ubuntu/.vscode-server/extensions/ms-python.python-2023.22.1/pythonFiles/lib/python/debugpy/adapter/../../debugpy/launcher/../../debugpy/../debugpy/server/cli.py", line 430, in main
run()
File "/home/ubuntu/.vscode-server/extensions/ms-python.python-2023.22.1/pythonFiles/lib/python/debugpy/adapter/../../debugpy/launcher/../../debugpy/../debugpy/server/cli.py", line 284, in run_file
runpy.run_path(target, run_name="main")
File "/home/ubuntu/.vscode-server/extensions/ms-python.python-2023.22.1/pythonFiles/lib/python/debugpy/_vendored/pydevd/_pydevd_bundle/pydevd_runpy.py", line 321, in run_path
return _run_module_code(code, init_globals, run_name,
File "/home/ubuntu/.vscode-server/extensions/ms-python.python-2023.22.1/pythonFiles/lib/python/debugpy/_vendored/pydevd/_pydevd_bundle/pydevd_runpy.py", line 135, in _run_module_code
_run_code(code, mod_globals, init_globals,
File "/home/ubuntu/.vscode-server/extensions/ms-python.python-2023.22.1/pythonFiles/lib/python/debugpy/_vendored/pydevd/_pydevd_bundle/pydevd_runpy.py", line 124, in _run_code
exec(code, run_globals)
File "/home/ubuntu/Text2Proc/tevatron/src/tevatron/driver/train.py", line 119, in
main()
File "/home/ubuntu/Text2Proc/tevatron/src/tevatron/driver/train.py", line 80, in main
hf_dataset = HFTrainDataset(tokenizer=tokenizer, data_args=data_args,
File "/home/ubuntu/Text2Proc/tevatron/src/tevatron/datasets/dataset.py", line 28, in init
self.dataset = load_dataset(data_args.dataset_name,
File "/home/ubuntu/.local/lib/python3.8/site-packages/datasets/load.py", line 1694, in load_dataset
builder_instance.download_and_prepare(
File "/home/ubuntu/.local/lib/python3.8/site-packages/datasets/builder.py", line 595, in download_and_prepare
self._download_and_prepare(
File "/home/ubuntu/.local/lib/python3.8/site-packages/datasets/builder.py", line 683, in _download_and_prepare
self._prepare_split(split_generator, **prepare_split_kwargs)
File "/home/ubuntu/.local/lib/python3.8/site-packages/datasets/builder.py", line 1138, in _prepare_split
writer.write_table(table)
File "/home/ubuntu/.local/lib/python3.8/site-packages/datasets/arrow_writer.py", line 473, in write_table
pa_table = pa.Table.from_arrays([pa_table[name] for name in self._schema.names], schema=self._schema)
File "pyarrow/table.pxi", line 3862, in pyarrow.lib.Table.from_arrays
File "pyarrow/table.pxi", line 1450, in pyarrow.lib._sanitize_arrays
File "pyarrow/array.pxi", line 354, in pyarrow.lib.asarray
File "pyarrow/table.pxi", line 552, in pyarrow.lib.ChunkedArray.cast
File "/home/ubuntu/.local/lib/python3.8/site-packages/pyarrow/compute.py", line 403, in cast
return call_function("cast", [arr], options, memory_pool)
File "pyarrow/_compute.pyx", line 572, in pyarrow._compute.call_function
File "pyarrow/_compute.pyx", line 367, in pyarrow._compute.Function.call
File "pyarrow/error.pxi", line 144, in pyarrow.lib.pyarrow_internal_check_status
File "pyarrow/error.pxi", line 121, in pyarrow.lib.check_status
pyarrow.lib.ArrowNotImplementedError: Unsupported cast from struct<docid: string, title: string, text: string> to null using function cast_null
Hello, While I'm trying train a retriever for RCR task. I'm getting this issue. Need some help to resolve this.
Traceback (most recent call last): File "/home/ubuntu/anaconda3/envs/tevatron/lib/python3.8/runpy.py", line 194, in _run_module_as_main return _run_code(code, main_globals, None, File "/home/ubuntu/anaconda3/envs/tevatron/lib/python3.8/runpy.py", line 87, in _run_code exec(code, run_globals) File "/home/ubuntu/.vscode-server/extensions/ms-python.python-2023.22.1/pythonFiles/lib/python/debugpy/adapter/../../debugpy/launcher/../../debugpy/main.py", line 39, in
cli.main()
File "/home/ubuntu/.vscode-server/extensions/ms-python.python-2023.22.1/pythonFiles/lib/python/debugpy/adapter/../../debugpy/launcher/../../debugpy/../debugpy/server/cli.py", line 430, in main
run()
File "/home/ubuntu/.vscode-server/extensions/ms-python.python-2023.22.1/pythonFiles/lib/python/debugpy/adapter/../../debugpy/launcher/../../debugpy/../debugpy/server/cli.py", line 284, in run_file
runpy.run_path(target, run_name="main")
File "/home/ubuntu/.vscode-server/extensions/ms-python.python-2023.22.1/pythonFiles/lib/python/debugpy/_vendored/pydevd/_pydevd_bundle/pydevd_runpy.py", line 321, in run_path
return _run_module_code(code, init_globals, run_name,
File "/home/ubuntu/.vscode-server/extensions/ms-python.python-2023.22.1/pythonFiles/lib/python/debugpy/_vendored/pydevd/_pydevd_bundle/pydevd_runpy.py", line 135, in _run_module_code
_run_code(code, mod_globals, init_globals,
File "/home/ubuntu/.vscode-server/extensions/ms-python.python-2023.22.1/pythonFiles/lib/python/debugpy/_vendored/pydevd/_pydevd_bundle/pydevd_runpy.py", line 124, in _run_code
exec(code, run_globals)
File "/home/ubuntu/Text2Proc/tevatron/src/tevatron/driver/train.py", line 119, in
main()
File "/home/ubuntu/Text2Proc/tevatron/src/tevatron/driver/train.py", line 80, in main
hf_dataset = HFTrainDataset(tokenizer=tokenizer, data_args=data_args,
File "/home/ubuntu/Text2Proc/tevatron/src/tevatron/datasets/dataset.py", line 28, in init
self.dataset = load_dataset(data_args.dataset_name,
File "/home/ubuntu/.local/lib/python3.8/site-packages/datasets/load.py", line 1694, in load_dataset
builder_instance.download_and_prepare(
File "/home/ubuntu/.local/lib/python3.8/site-packages/datasets/builder.py", line 595, in download_and_prepare
self._download_and_prepare(
File "/home/ubuntu/.local/lib/python3.8/site-packages/datasets/builder.py", line 683, in _download_and_prepare
self._prepare_split(split_generator, **prepare_split_kwargs)
File "/home/ubuntu/.local/lib/python3.8/site-packages/datasets/builder.py", line 1138, in _prepare_split
writer.write_table(table)
File "/home/ubuntu/.local/lib/python3.8/site-packages/datasets/arrow_writer.py", line 473, in write_table
pa_table = pa.Table.from_arrays([pa_table[name] for name in self._schema.names], schema=self._schema)
File "pyarrow/table.pxi", line 3862, in pyarrow.lib.Table.from_arrays
File "pyarrow/table.pxi", line 1450, in pyarrow.lib._sanitize_arrays
File "pyarrow/array.pxi", line 354, in pyarrow.lib.asarray
File "pyarrow/table.pxi", line 552, in pyarrow.lib.ChunkedArray.cast
File "/home/ubuntu/.local/lib/python3.8/site-packages/pyarrow/compute.py", line 403, in cast
return call_function("cast", [arr], options, memory_pool)
File "pyarrow/_compute.pyx", line 572, in pyarrow._compute.call_function
File "pyarrow/_compute.pyx", line 367, in pyarrow._compute.Function.call
File "pyarrow/error.pxi", line 144, in pyarrow.lib.pyarrow_internal_check_status
File "pyarrow/error.pxi", line 121, in pyarrow.lib.check_status
pyarrow.lib.ArrowNotImplementedError: Unsupported cast from struct<docid: string, title: string, text: string> to null using function cast_null