Training data with annotation scheme other than the UD one

Is it possible to train with non-UD data (conllu format, but different POS and syntactic labels)? At the end of the 1st epoch, I get the following error message:
Posdep tagger: Epoch: 0
Train 0:  98%|████████████████████████▍| 2478/2533 [07:03<00:09,  5.54it/s]../aten/src/ATen/native/cuda/Loss.cu:250: nll_loss_forward_reduce_cuda_kernel_2d: block: [0,0,0], thread: [22,0,0] Assertion `t >= 0 && t < n_classes` failed.
../aten/src/ATen/native/cuda/Loss.cu:250: nll_loss_forward_reduce_cuda_kernel_2d: block: [0,0,0], thread: [27,0,0] Assertion `t >= 0 && t < n_classes` failed.
../aten/src/ATen/native/cuda/Loss.cu:250: nll_loss_forward_reduce_cuda_kernel_2d: block: [0,0,0], thread: [28,0,0] Assertion `t >= 0 && t < n_classes` failed.
---------------------------------------------------------------------------
RuntimeError                              Traceback (most recent call last)
Cell In[2], line 1
----> 1 trainer2.train()

File ~/miniconda3/envs/trankit/lib/python3.9/site-packages/trankit/tpipeline.py:680, in TPipeline.train(self)
    678     self._train_mwt()
    679 elif self._task == 'posdep':
--> 680     self._train_posdep()
    681 elif self._task == 'lemmatize':
    682     self._train_lemma()

File ~/miniconda3/envs/trankit/lib/python3.9/site-packages/trankit/tpipeline.py:485, in TPipeline._train_posdep(self)
    483 progress.update(1)
    484 word_reprs, cls_reprs = self._embedding_layers.get_tagger_inputs(batch)
--> 485 loss = self._tagger(batch, word_reprs, cls_reprs)
    486 loss.backward()
    488 torch.nn.utils.clip_grad_norm_([p for n, p in self.model_parameters], self._config.grad_clipping)

File ~/miniconda3/envs/trankit/lib/python3.9/site-packages/torch/nn/modules/module.py:1518, in Module._wrapped_call_impl(self, *args, **kwargs)
   1516     return self._compiled_call_impl(*args, **kwargs)  # type: ignore[misc]
   1517 else:
-> 1518     return self._call_impl(*args, **kwargs)

File ~/miniconda3/envs/trankit/lib/python3.9/site-packages/torch/nn/modules/module.py:1527, in Module._call_impl(self, *args, **kwargs)
   1522 # If we don't have any hooks, we want to skip the rest of the logic in
   1523 # this function, and just call forward.
   1524 if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks or self._forward_pre_hooks
   1525         or _global_backward_pre_hooks or _global_backward_hooks
   1526         or _global_forward_hooks or _global_forward_pre_hooks):
-> 1527     return forward_call(*args, **kwargs)
   1529 try:
   1530     result = None

File ~/miniconda3/envs/trankit/lib/python3.9/site-packages/trankit/models/classifiers.py:146, in PosDepClassifier.forward(self, batch, word_reprs, cls_reprs)
    141 deprel_scores = torch.gather(deprel_scores, 2,
    142                              batch.head_idxs.unsqueeze(2).unsqueeze(3).expand(-1, -1, -1, len(
    143                                  self.vocabs[DEPREL]))).view(
    144     -1, len(self.vocabs[DEPREL]))
    145 deprel_target = batch.deprel_idxs.masked_fill(batch.word_mask[:, 1:], -100)
--> 146 loss += self.criteria(deprel_scores.contiguous(), deprel_target.view(-1))
    148 return loss

File ~/miniconda3/envs/trankit/lib/python3.9/site-packages/torch/nn/modules/module.py:1518, in Module._wrapped_call_impl(self, *args, **kwargs)
   1516     return self._compiled_call_impl(*args, **kwargs)  # type: ignore[misc]
   1517 else:
-> 1518     return self._call_impl(*args, **kwargs)

File ~/miniconda3/envs/trankit/lib/python3.9/site-packages/torch/nn/modules/module.py:1527, in Module._call_impl(self, *args, **kwargs)
   1522 # If we don't have any hooks, we want to skip the rest of the logic in
   1523 # this function, and just call forward.
   1524 if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks or self._forward_pre_hooks
   1525         or _global_backward_pre_hooks or _global_backward_hooks
   1526         or _global_forward_hooks or _global_forward_pre_hooks):
-> 1527     return forward_call(*args, **kwargs)
   1529 try:
   1530     result = None

File ~/miniconda3/envs/trankit/lib/python3.9/site-packages/torch/nn/modules/loss.py:1179, in CrossEntropyLoss.forward(self, input, target)
   1178 def forward(self, input: Tensor, target: Tensor) -> Tensor:
-> 1179     return F.cross_entropy(input, target, weight=self.weight,
   1180                            ignore_index=self.ignore_index, reduction=self.reduction,
   1181                            label_smoothing=self.label_smoothing)

File ~/miniconda3/envs/trankit/lib/python3.9/site-packages/torch/nn/functional.py:3053, in cross_entropy(input, target, weight, size_average, ignore_index, reduce, reduction, label_smoothing)
   3051 if size_average is not None or reduce is not None:
   3052     reduction = _Reduction.legacy_get_string(size_average, reduce)
-> 3053 return torch._C._nn.cross_entropy_loss(input, target, weight, _Reduction.get_enum(reduction), ignore_index, label_smoothing)

RuntimeError: CUDA error: device-side assert triggered
CUDA kernel errors might be asynchronously reported at some other API call, so the stacktrace below might be incorrect.
For debugging consider passing CUDA_LAUNCH_BLOCKING=1.
Compile with `TORCH_USE_CUDA_DSA` to enable device-side assertions.
nlp-uoregon / trankit

Training data with annotation scheme other than the UD one #79