Thinc 8.1.0 introduced breaking changes around tensor initialization which is not ported to spacy-experimental's biaffine parser. This makes the parser fail during training as new tensors are initialized on the CPU.
How to reproduce the behaviour
The error can be reproduced by running all the commands listed in this document
The error message:
Traceback (most recent call last):
File "/home/a100/gorosz/spacy-hungarian-models/hu_core_news_trf_xl/.venv/bin/spacy", line 8, in <module>
sys.exit(setup_cli())
File "/home/a100/gorosz/spacy-hungarian-models/hu_core_news_trf_xl/.venv/lib/python3.8/site-packages/spacy/cli/_util.py", line 71, in setup_cli
command(prog_name=COMMAND)
File "/home/a100/gorosz/spacy-hungarian-models/hu_core_news_trf_xl/.venv/lib/python3.8/site-packages/click/core.py", line 829, in __call__
return self.main(*args, **kwargs)
File "/home/a100/gorosz/spacy-hungarian-models/hu_core_news_trf_xl/.venv/lib/python3.8/site-packages/click/core.py", line 782, in main
rv = self.invoke(ctx)
File "/home/a100/gorosz/spacy-hungarian-models/hu_core_news_trf_xl/.venv/lib/python3.8/site-packages/click/core.py", line 1259, in invoke
return _process_result(sub_ctx.command.invoke(sub_ctx))
File "/home/a100/gorosz/spacy-hungarian-models/hu_core_news_trf_xl/.venv/lib/python3.8/site-packages/click/core.py", line 1066, in invoke
return ctx.invoke(self.callback, **ctx.params)
File "/home/a100/gorosz/spacy-hungarian-models/hu_core_news_trf_xl/.venv/lib/python3.8/site-packages/click/core.py", line 610, in invoke
return callback(*args, **kwargs)
File "/home/a100/gorosz/spacy-hungarian-models/hu_core_news_trf_xl/.venv/lib/python3.8/site-packages/typer/main.py", line 497, in wrapper
return callback(**use_params) # type: ignore
File "/home/a100/gorosz/spacy-hungarian-models/hu_core_news_trf_xl/.venv/lib/python3.8/site-packages/spacy/cli/train.py", line 45, in train_cli
train(config_path, output_path, use_gpu=use_gpu, overrides=overrides)
File "/home/a100/gorosz/spacy-hungarian-models/hu_core_news_trf_xl/.venv/lib/python3.8/site-packages/spacy/cli/train.py", line 72, in train
nlp = init_nlp(config, use_gpu=use_gpu)
File "/home/a100/gorosz/spacy-hungarian-models/hu_core_news_trf_xl/.venv/lib/python3.8/site-packages/spacy/training/initialize.py", line 84, in init_nlp
nlp.initialize(lambda: train_corpus(nlp), sgd=optimizer)
File "/home/a100/gorosz/spacy-hungarian-models/hu_core_news_trf_xl/.venv/lib/python3.8/site-packages/spacy/language.py", line 1317, in initialize
proc.initialize(get_examples, nlp=self, **p_settings)
File "spacy_experimental/biaffine_parser/arc_predicter.pyx", line 124, in spacy_experimental.biaffine_parser.arc_predicter.ArcPredicter.initialize
File "/home/a100/gorosz/spacy-hungarian-models/hu_core_news_trf_xl/.venv/lib/python3.8/site-packages/thinc/model.py", line 299, in initialize
self.init(self, X=X, Y=Y)
File "/home/a100/gorosz/spacy-hungarian-models/hu_core_news_trf_xl/.venv/lib/python3.8/site-packages/thinc/layers/chain.py", line 92, in init
curr_input = layer.predict(curr_input)
File "/home/a100/gorosz/spacy-hungarian-models/hu_core_news_trf_xl/.venv/lib/python3.8/site-packages/thinc/model.py", line 315, in predict
return self._func(self, X, is_train=False)[0]
File "/home/a100/gorosz/spacy-hungarian-models/hu_core_news_trf_xl/.venv/lib/python3.8/site-packages/spacy_experimental/biaffine_parser/pairwise_bilinear.py", line 98, in pairswise_bilinear_forward
return model.layers[0](X, is_train)
File "/home/a100/gorosz/spacy-hungarian-models/hu_core_news_trf_xl/.venv/lib/python3.8/site-packages/thinc/model.py", line 291, in __call__
return self._func(self, X, is_train=is_train)
File "/home/a100/gorosz/spacy-hungarian-models/hu_core_news_trf_xl/.venv/lib/python3.8/site-packages/thinc/layers/pytorchwrapper.py", line 143, in forward
Ytorch, torch_backprop = model.shims[0](Xtorch, is_train)
File "/home/a100/gorosz/spacy-hungarian-models/hu_core_news_trf_xl/.venv/lib/python3.8/site-packages/thinc/shims/pytorch.py", line 72, in __call__
return self.predict(inputs), lambda a: ...
File "/home/a100/gorosz/spacy-hungarian-models/hu_core_news_trf_xl/.venv/lib/python3.8/site-packages/thinc/shims/pytorch.py", line 90, in predict
outputs = self._model(*inputs.args, **inputs.kwargs)
File "/home/a100/gorosz/spacy-hungarian-models/hu_core_news_trf_xl/.venv/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1102, in _call_impl
return forward_call(*input, **kwargs)
File "/home/a100/gorosz/spacy-hungarian-models/hu_core_news_trf_xl/.venv/lib/python3.8/site-packages/spacy_experimental/biaffine_parser/pytorch_pairwise_bilinear.py", line 103, in forward
token_mask = torch.arange(max_seq_len).unsqueeze(0) < seq_lens.unsqueeze(1)
RuntimeError: Expected all tensors to be on the same device, but found at least two devices, cuda:3 and cpu!
Your Environment
Operating System: Ubuntu 20.04.5 LTS
Python Version Used: 3.8.5
spaCy Version Used: 3.4.1
Environment Information: virtualenv managed by poetry
Description
Thinc 8.1.0 introduced breaking changes around tensor initialization which is not ported to spacy-experimental's biaffine parser. This makes the parser fail during training as new tensors are initialized on the CPU.
How to reproduce the behaviour
The error can be reproduced by running all the commands listed in this document
The error message:
Your Environment