Closed Aldo-Aditiya closed 1 month ago
Running the convert_single_pdf function, I got the below error:
convert_single_pdf
File ...lib/python3.9/site-packages/marker/convert.py:90, in convert_single_pdf(fname, model_lst, max_pages, start_page, metadata, langs, batch_multiplier) 87 flush_cuda_memory() 89 # OCR pages as needed ---> 90 pages, ocr_stats = run_ocr(doc, pages, langs, ocr_model, batch_multiplier=batch_multiplier) 91 flush_cuda_memory() 93 out_meta["ocr_stats"] = ocr_stats File ...lib/python3.9/site-packages/marker/ocr/recognition.py:51, in run_ocr(doc, pages, langs, rec_model, batch_multiplier) 49 return pages, {"ocr_pages": 0, "ocr_failed": 0, "ocr_success": 0, "ocr_engine": "none"} 50 elif ocr_method == "surya": ---> 51 new_pages = surya_recognition(doc, ocr_idxs, langs, rec_model, pages, batch_multiplier=batch_multiplier) 52 elif ocr_method == "ocrmypdf": 53 new_pages = tesseract_recognition(doc, ocr_idxs, langs) File ...lib/python3.9/site-packages/marker/ocr/recognition.py:76, in surya_recognition(doc, page_idxs, langs, rec_model, pages, batch_multiplier) 73 detection_results = [p.text_lines.bboxes for p in selected_pages] 74 polygons = [[b.polygon for b in bboxes] for bboxes in detection_results] ---> 76 results = run_recognition(images, surya_langs, rec_model, processor, polygons=polygons, batch_size=int(get_batch_size() * batch_multiplier)) 78 new_pages = [] 79 for (page_idx, result, old_page) in zip(page_idxs, results, selected_pages): File ...lib/python3.9/site-packages/surya/ocr.py:30, in run_recognition(images, langs, rec_model, rec_processor, bboxes, polygons, batch_size) 27 all_slices.extend(slices) 28 all_langs.extend([lang] * len(slices)) ---> 30 rec_predictions, _ = batch_recognition(all_slices, all_langs, rec_model, rec_processor, batch_size=batch_size) 32 predictions_by_image = [] 33 slice_start = 0 File ...lib/python3.9/site-packages/surya/recognition.py:138, in batch_recognition(images, languages, model, processor, batch_size) 136 while token_count < settings.RECOGNITION_MAX_TOKENS: 137 is_prefill = token_count == 0 --> 138 return_dict = model( 139 decoder_input_ids=batch_decoder_input, 140 decoder_attention_mask=attention_mask, 141 decoder_self_kv_cache=None if is_prefill else decoder_cache, 142 decoder_cross_kv_cache=None if is_prefill else encoder_cache, 143 decoder_past_token_count=token_count, 144 decoder_langs=batch_langs, 145 pixel_values=batch_pixel_values, 146 encoder_outputs=encoder_outputs, 147 return_dict=True, 148 ) 150 logits = return_dict["logits"][:current_batch_size] # Ignore batch padding 151 preds = torch.argmax(logits[:, -1], dim=-1) File ...lib/python3.9/site-packages/torch/nn/modules/module.py:1532, in Module._wrapped_call_impl(self, *args, **kwargs) 1530 return self._compiled_call_impl(*args, **kwargs) # type: ignore[misc] 1531 else: -> 1532 return self._call_impl(*args, **kwargs) File ...lib/python3.9/site-packages/torch/nn/modules/module.py:1541, in Module._call_impl(self, *args, **kwargs) 1536 # If we don't have any hooks, we want to skip the rest of the logic in 1537 # this function, and just call forward. 1538 if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks or self._forward_pre_hooks 1539 or _global_backward_pre_hooks or _global_backward_hooks 1540 or _global_forward_hooks or _global_forward_pre_hooks): -> 1541 return forward_call(*args, **kwargs) 1543 try: 1544 result = None File ...lib/python3.9/site-packages/transformers/models/vision_encoder_decoder/modeling_vision_encoder_decoder.py:615, in VisionEncoderDecoderModel.forward(self, pixel_values, decoder_input_ids, decoder_attention_mask, encoder_outputs, past_key_values, decoder_inputs_embeds, labels, use_cache, output_attentions, output_hidden_states, return_dict, **kwargs) 610 decoder_input_ids = shift_tokens_right( 611 labels, self.config.pad_token_id, self.config.decoder_start_token_id 612 ) 614 # Decode --> 615 decoder_outputs = self.decoder( 616 input_ids=decoder_input_ids, 617 attention_mask=decoder_attention_mask, 618 encoder_hidden_states=encoder_hidden_states, 619 encoder_attention_mask=encoder_attention_mask, 620 inputs_embeds=decoder_inputs_embeds, 621 output_attentions=output_attentions, 622 output_hidden_states=output_hidden_states, 623 use_cache=use_cache, 624 past_key_values=past_key_values, 625 return_dict=return_dict, 626 **kwargs_decoder, 627 ) 629 # Compute loss independent from decoder (as some shift the logits inside them) 630 loss = None File ...lib/python3.9/site-packages/torch/nn/modules/module.py:1532, in Module._wrapped_call_impl(self, *args, **kwargs) 1530 return self._compiled_call_impl(*args, **kwargs) # type: ignore[misc] 1531 else: -> 1532 return self._call_impl(*args, **kwargs) File ...lib/python3.9/site-packages/torch/nn/modules/module.py:1541, in Module._call_impl(self, *args, **kwargs) 1536 # If we don't have any hooks, we want to skip the rest of the logic in 1537 # this function, and just call forward. 1538 if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks or self._forward_pre_hooks 1539 or _global_backward_pre_hooks or _global_backward_hooks 1540 or _global_forward_hooks or _global_forward_pre_hooks): -> 1541 return forward_call(*args, **kwargs) 1543 try: 1544 result = None File ...lib/python3.9/site-packages/surya/model/recognition/decoder.py:474, in MBartMoE.forward(self, input_ids, attention_mask, self_kv_cache, cross_kv_cache, past_token_count, langs, encoder_hidden_states, encoder_attention_mask, head_mask, cross_attn_head_mask, past_key_values, inputs_embeds, labels, use_cache, output_attentions, output_hidden_states, return_dict) 471 return_dict = return_dict if return_dict is not None else self.config.use_return_dict 473 # decoder outputs consists of (dec_features, layer_state, dec_hidden, dec_attn) --> 474 outputs = self.model.decoder( 475 input_ids=input_ids, 476 attention_mask=attention_mask, 477 self_kv_cache=self_kv_cache, 478 cross_kv_cache=cross_kv_cache, 479 past_token_count=past_token_count, 480 langs=langs, 481 encoder_hidden_states=encoder_hidden_states, 482 ) 484 logits = self.lm_head(outputs[0]) 486 if not return_dict: File ...lib/python3.9/site-packages/torch/nn/modules/module.py:1532, in Module._wrapped_call_impl(self, *args, **kwargs) 1530 return self._compiled_call_impl(*args, **kwargs) # type: ignore[misc] 1531 else: -> 1532 return self._call_impl(*args, **kwargs) File ...lib/python3.9/site-packages/torch/nn/modules/module.py:1541, in Module._call_impl(self, *args, **kwargs) 1536 # If we don't have any hooks, we want to skip the rest of the logic in 1537 # this function, and just call forward. 1538 if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks or self._forward_pre_hooks 1539 or _global_backward_pre_hooks or _global_backward_hooks 1540 or _global_forward_hooks or _global_forward_pre_hooks): -> 1541 return forward_call(*args, **kwargs) 1543 try: 1544 result = None File ...lib/python3.9/site-packages/surya/model/recognition/decoder.py:387, in MBartMoEDecoder.forward(self, input_ids, attention_mask, self_kv_cache, cross_kv_cache, past_token_count, langs, encoder_hidden_states) 385 layer_self_kv_cache = self_kv_cache[idx] if self_kv_cache is not None else None 386 layer_cross_kv_cache = cross_kv_cache[idx] if cross_kv_cache is not None else None --> 387 layer_outputs = decoder_layer( 388 hidden_states, 389 attention_mask=attention_mask, 390 langs=langs, 391 self_kv_cache=layer_self_kv_cache, 392 cross_kv_cache=layer_cross_kv_cache, 393 is_prefill=is_prefill, 394 encoder_hidden_states=encoder_hidden_states, 395 encoder_attention_mask=None, 396 use_cache=use_cache, 397 ) 398 hidden_states = layer_outputs[0] 400 if use_cache: File ...lib/python3.9/site-packages/torch/nn/modules/module.py:1532, in Module._wrapped_call_impl(self, *args, **kwargs) 1530 return self._compiled_call_impl(*args, **kwargs) # type: ignore[misc] 1531 else: -> 1532 return self._call_impl(*args, **kwargs) File ...lib/python3.9/site-packages/torch/nn/modules/module.py:1541, in Module._call_impl(self, *args, **kwargs) 1536 # If we don't have any hooks, we want to skip the rest of the logic in 1537 # this function, and just call forward. 1538 if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks or self._forward_pre_hooks 1539 or _global_backward_pre_hooks or _global_backward_hooks 1540 or _global_forward_hooks or _global_forward_pre_hooks): -> 1541 return forward_call(*args, **kwargs) 1543 try: 1544 result = None File ...lib/python3.9/site-packages/surya/model/recognition/decoder.py:308, in MBartMoEDecoderLayer.forward(self, hidden_states, attention_mask, langs, self_kv_cache, cross_kv_cache, is_prefill, encoder_hidden_states, encoder_attention_mask, use_cache) 306 hidden_states = self.final_layer_norm(hidden_states) 307 if self.has_moe: --> 308 hidden_states = self.moe(hidden_states, langs) 309 else: 310 hidden_states = self.activation_fn(self.fc1(hidden_states)) File ...lib/python3.9/site-packages/torch/nn/modules/module.py:1532, in Module._wrapped_call_impl(self, *args, **kwargs) 1530 return self._compiled_call_impl(*args, **kwargs) # type: ignore[misc] 1531 else: -> 1532 return self._call_impl(*args, **kwargs) File ...lib/python3.9/site-packages/torch/nn/modules/module.py:1541, in Module._call_impl(self, *args, **kwargs) 1536 # If we don't have any hooks, we want to skip the rest of the logic in 1537 # this function, and just call forward. 1538 if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks or self._forward_pre_hooks 1539 or _global_backward_pre_hooks or _global_backward_hooks 1540 or _global_forward_hooks or _global_forward_pre_hooks): -> 1541 return forward_call(*args, **kwargs) 1543 try: 1544 result = None File ...lib/python3.9/site-packages/surya/model/recognition/decoder.py:99, in MBartExpertLayer.forward(self, hidden_states, langs) 96 if idx.shape[0] == 0: 97 continue ---> 99 expert_layer = self.experts[str(expert_lang.item())] 101 current_state = hidden_states[idx] 102 current_hidden_states = expert_layer(current_state.view(-1, hidden_dim)) File ...lib/python3.9/site-packages/torch/nn/modules/container.py:461, in ModuleDict.__getitem__(self, key) 459 @_copy_to_script_wrapper 460 def __getitem__(self, key: str) -> Module: --> 461 return self._modules[key] KeyError: '65555'
Closing this, and moving the issue to surya repo. https://github.com/VikParuchuri/surya/issues/128
Running the
convert_single_pdf
function, I got the below error: