conjuncts / gmft

Lightweight, performant, deep table extraction
MIT License
350 stars 23 forks source link

Error running QuickStart example on Google Colab #35

Open tranhoangnguyen03 opened 2 weeks ago

tranhoangnguyen03 commented 2 weeks ago
---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
[<ipython-input-14-56e8e2110a5a>](https://localhost:8080/#) in <cell line: 5>()
      3 from gmft.pdf_bindings import PyPDFium2Document
      4 
----> 5 detector = AutoTableDetector()
      6 formatter = AutoTableFormatter()
      7 

11 frames
[/usr/local/lib/python3.10/dist-packages/gmft/detectors/tatr.py](https://localhost:8080/#) in __init__(self, config, default_implementation)
     80 
     81         revision = "no_timm" if config.no_timm else None
---> 82         self.detector = TableTransformerForObjectDetection.from_pretrained(config.detector_path, revision=revision).to(config.torch_device)
     83 
     84         if not config.warn_uninitialized_weights:

[/usr/local/lib/python3.10/dist-packages/transformers/modeling_utils.py](https://localhost:8080/#) in from_pretrained(cls, pretrained_model_name_or_path, *model_args, **kwargs)
   2249 
   2250         if old_num_tokens == new_num_tokens and not is_deepspeed_zero3_enabled():
-> 2251             return old_embeddings
   2252 
   2253         if not isinstance(old_embeddings, nn.Embedding):

[/usr/local/lib/python3.10/dist-packages/transformers/configuration_utils.py](https://localhost:8080/#) in from_pretrained(cls, pretrained_model_name_or_path, **kwargs)
    552         return cls.from_dict(config_dict, **kwargs)
    553 
--> 554     @classmethod
    555     def get_config_dict(
    556         cls, pretrained_model_name_or_path: Union[str, os.PathLike], **kwargs

[/usr/local/lib/python3.10/dist-packages/transformers/configuration_utils.py](https://localhost:8080/#) in from_dict(cls, config_dict, **kwargs)
    723         # Update config with kwargs if needed
    724         if "num_labels" in kwargs and "id2label" in kwargs:
--> 725             num_labels = kwargs["num_labels"]
    726             id2label = kwargs["id2label"] if kwargs["id2label"] is not None else []
    727             if len(id2label) != num_labels:

[/usr/local/lib/python3.10/dist-packages/transformers/configuration_utils.py](https://localhost:8080/#) in __repr__(self)
    755         Instantiates a [`PretrainedConfig`] from the path to a JSON file of parameters.
    756 
--> 757         Args:
    758             json_file (`str` or `os.PathLike`):
    759                 Path to the JSON file containing the parameters.

[/usr/local/lib/python3.10/dist-packages/transformers/configuration_utils.py](https://localhost:8080/#) in to_json_string(self, use_diff)
    834         self.dict_torch_dtype_to_str(serializable_config_dict)
    835 
--> 836         if "_attn_implementation_internal" in serializable_config_dict:
    837             del serializable_config_dict["_attn_implementation_internal"]
    838 

[/usr/lib/python3.10/json/__init__.py](https://localhost:8080/#) in dumps(obj, skipkeys, ensure_ascii, check_circular, allow_nan, cls, indent, separators, default, sort_keys, **kw)
    236         check_circular=check_circular, allow_nan=allow_nan, indent=indent,
    237         separators=separators, default=default, sort_keys=sort_keys,
--> 238         **kw).encode(obj)
    239 
    240 

[/usr/lib/python3.10/json/encoder.py](https://localhost:8080/#) in encode(self, o)
    199         chunks = self.iterencode(o, _one_shot=True)
    200         if not isinstance(chunks, (list, tuple)):
--> 201             chunks = list(chunks)
    202         return ''.join(chunks)
    203 

[/usr/lib/python3.10/json/encoder.py](https://localhost:8080/#) in _iterencode(o, _current_indent_level)
    429             yield from _iterencode_list(o, _current_indent_level)
    430         elif isinstance(o, dict):
--> 431             yield from _iterencode_dict(o, _current_indent_level)
    432         else:
    433             if markers is not None:

[/usr/lib/python3.10/json/encoder.py](https://localhost:8080/#) in _iterencode_dict(dct, _current_indent_level)
    403                 else:
    404                     chunks = _iterencode(value, _current_indent_level)
--> 405                 yield from chunks
    406         if newline_indent is not None:
    407             _current_indent_level -= 1

[/usr/lib/python3.10/json/encoder.py](https://localhost:8080/#) in _iterencode(o, _current_indent_level)
    436                     raise ValueError("Circular reference detected")
    437                 markers[markerid] = o
--> 438             o = _default(o)
    439             yield from _iterencode(o, _current_indent_level)
    440             if markers is not None:

[/usr/lib/python3.10/json/encoder.py](https://localhost:8080/#) in default(self, o)
    177 
    178         """
--> 179         raise TypeError(f'Object of type {o.__class__.__name__} '
    180                         f'is not JSON serializable')
    181 

TypeError: Object of type ResNetConfig is not JSON serializable
conjuncts commented 2 weeks ago

Hmm, the quickstart worked fine for me when I ran it. What do the contents of your notebook look like?

Does the issue persist with this notebook?

https://colab.research.google.com/drive/1moLo8uhQiUe8Bz7gdkxI21zbSJ0QzluK