felixdittrich92 / OnnxTR

OnnxTR a docTR (Document Text Recognition) library Onnx pipeline wrapper - for seamless, high-performing & accessible OCR
https://github.com/mindee/doctr
Apache License 2.0
40 stars 5 forks source link

Error While Loading the models locally #31

Closed saisoulpage closed 2 months ago

saisoulpage commented 2 months ago

Bug description

hi @felixdittrich92 i am getting error while loading the model, i have attached the code below please check it once.

Code snippet to reproduce the bug

image_path_data = read_image_as_bytes(image_path)
doc = cv2.imdecode(np.frombuffer(image_path_data, np.uint8), cv2.IMREAD_COLOR)
doc = [cv2.cvtColor(doc, cv2.COLOR_BGR2RGB)]

print("==================================================", doc[0].shape, len(doc))
# doc = DocumentFile.from_images(a)
reco_model = parseq("crnn_vgg16_bn-662979cc.onnx", vocab="ABC")
det_model = linknet_resnet18("rep_fast_base-1b89ebf9.onnx")

model = ocr_predictor(
    det_arch=det_model,  # detection architecture
    reco_arch=reco_model,  # recognition architecture
    det_bs=2, # detection batch size
    reco_bs=512, # recognition batch size
    assume_straight_pages=True,  # set to `False` if the pages are not straight (rotation, perspective, etc.) (default: True)
    straighten_pages=False,  # set to `True` if the pages should be straightened before final processing (default: False)
    # Preprocessing related parameters
    preserve_aspect_ratio=True,  # set to `False` if the aspect ratio should not be preserved (default: True)
    symmetric_pad=True,  # set to `False` to disable symmetric padding (default: True)
    # Additional parameters - meta information
    detect_orientation=False,  # set to `True` if the orientation of the pages should be detected (default: False)
    detect_language=False, # set to `True` if the language of the pages should be detected (default: False)
    # DocumentBuilder specific parameters
    resolve_lines=True,  # whether words should be automatically grouped into lines (default: True)
    resolve_blocks=False,  # whether lines should be automatically grouped into blocks (default: False)
    paragraph_break=0.035,  # relative length of the minimum space separating paragraphs (default: 0.035)
    # OnnxTR specific parameters
    # NOTE: 8-Bit quantized models are not available for FAST detection models and can in general lead to poorer accuracy
    load_in_8_bit=False,  # set to `True` to load 8-bit quantized models instead of the full precision onces (default: False)
    # Advanced engine configuration options
    det_engine_cfg=EngineConfig(),  # detection model engine configuration (default: internal predefined configuration)
    reco_engine_cfg=EngineConfig(),  # recognition model engine configuration (default: internal predefined configuration)
    clf_engine_cfg=EngineConfig()
)  # classification ()  # NOTE: Change me to False if the page is not straight or contains rotated text

res = model(doc)
json_res = res.export()

# Decode the image (only for visualization purposes)
image = cv2.imread(image_path)
image = image

words_to_mask_main = []
confs_main = []
polyline_main = []

for page in json_res["pages"]:
  page_idx = page["page_idx"]  # The index of the page
  shape = page["dimensions"]  # The shape of the page (height, width)
  # Dict with the orientation of the page (angle in degrees, confidence)
  # (if detect_orientation is True and/or assume_straight_pages is False)
  orientation = page["orientation"]
  language = page["language"]  # The detected language of the page (if detect_language is True)

  for block in page["blocks"]:
      block_geom = _to_absolute(block["geometry"], shape)  # The geom of the block (now absolute coordinates)
      # The average objectness score of the block (over lines in the block)
      block_objectness_score = block["objectness_score"]
      # draw block on image
      cv2.polylines(image, [np.array(block_geom).reshape(-1, 1, 2)], True, (0, 255, 0), 2)
      for line in block["lines"]:
          line_geom = _to_absolute(line["geometry"], shape)  # The geom of the line (now absolute coordinates)
          # The average objectness score of the block (over words in the line)
          line_objectness_score = line["objectness_score"]
          # draw line on image
          cv2.polylines(image, [np.array(line_geom).reshape(-1, 1, 2)], True, (0, 0, 255), 2)

          words_to_mask = []
          confs = []
          polylines = []
          count = 0
          for word in line['words']:
            print("words", word)
            # checking the conf of the word and return very less conf words
            if word["confidence"] < 0.85:
              print(word['value'], word["confidence"])
              words_to_mask.append(word['value'])
              confs.append(word["confidence"])
              count += 1
              if count < 2:
                polylines.append(np.array(line_geom).reshape(-1, 1, 2))

          try:
            words_to_mask_main.append("".join(words_to_mask))
            confs_main.append(min(confs))
            polyline_main.append(polylines[0])
          except:
            pass

return polyline_main

Error traceback

---------------------------------------------------------------------------

IndexError                                Traceback (most recent call last)

[<ipython-input-48-b9b6d2e84fd6>](https://localhost:8080/#) in <cell line: 1>()
      1 if __name__ == "__main__":
      2     input_path = "/content/h2.jpeg"
----> 3     main(input_path)

9 frames

[/usr/local/lib/python3.10/dist-packages/onnxtr/models/recognition/models/parseq.py](https://localhost:8080/#) in <genexpr>(.0)
     92 
     93         word_values = [
---> 94             "".join(self._embedding[idx] for idx in encoded_seq).split("<eos>")[0] for encoded_seq in out_idxs
     95         ]
     96         # compute probabilties for each word up to the EOS token

IndexError: list index out of range

Environment

i am using colab with default python version

felixdittrich92 commented 2 months ago

Hi @saisoulpage :wave:,

First you load the onnx versions of different models: parseq with crnn_vgg16_bn onnx model and same for the detection linknet_resnet18 with fast_base onnx model

reco_model = parseq("crnn_vgg16_bn-662979cc.onnx", vocab="ABC")  # wrong onnx model
det_model = linknet_resnet18("rep_fast_base-1b89ebf9.onnx")  # wrong onnx model

And you cannot simply change the vocab this would require your own custom trained model if you only want that the model predicts A,B or C :)

reco_model = parseq("crnn_vgg16_bn-662979cc.onnx", vocab="ABC")  # works only with custom model

Custom model training ref.: https://mindee.github.io/doctr/using_doctr/custom_models_training.html Custom model export ref.: https://mindee.github.io/doctr/using_doctr/using_model_export.html#export-to-onnx

saisoulpage commented 2 months ago

i need to load pretrained models that you are using manually that i need to keep in a models directory. could you please help me on it

felixdittrich92 commented 2 months ago

How to fix ?

wget https://github.com/felixdittrich92/OnnxTR/releases/download/v0.0.1/parseq-00b40714.onnx
wget https://github.com/felixdittrich92/OnnxTR/releases/download/v0.0.1/linknet_resnet18-e0e0b9dc.onnx
reco_model = parseq("<PATH_TO>/parseq-00b40714.onnx")
det_model = linknet_resnet18("<PATH_TO>/linknet_resnet18-e0e0b9dc.onnx")

# Rest of your code

:)

felixdittrich92 commented 2 months ago

BTW.: You can set an environment varaible ONNXTR_CACHE_DIR to any folder you want and the models will be downloaded into this location instead of the default ~/.cache/onnxtr/models

saisoulpage commented 2 months ago

https://github.com/felixdittrich92/OnnxTR/releases/download/v0.0.1/rep_fast_base-1b89ebf9.onnx https://github.com/felixdittrich92/OnnxTR/releases/download/v0.0.1/crnn_vgg16_bn-662979cc.onnx

How load this models locally

How to fix ?

wget https://github.com/felixdittrich92/OnnxTR/releases/download/v0.0.1/parseq-00b40714.onnx
wget https://github.com/felixdittrich92/OnnxTR/releases/download/v0.0.1/linknet_resnet18-e0e0b9dc.onnx
reco_model = parseq("<PATH_TO>/parseq-00b40714.onnx")
det_model = linknet_resnet18("<PATH_TO>/linknet_resnet18-e0e0b9dc.onnx")

# Rest of your code

:)

felixdittrich92 commented 2 months ago
import os
os.environ["ONNXTR_CACHE_DIR"] = "<full_path_to_folder>"

On top of your script :) Now all downloaded models will be saved in the given location instead of the default one

saisoulpage commented 2 months ago
import os
os.environ["ONNXTR_CACHE_DIR"] = "<full_path_to_folder>"

On top of your script :) Now all downloaded models will be saved in the given location instead of the default one

Sorted it

saisoulpage commented 2 months ago

https://github.com/felixdittrich92/OnnxTR/releases/download/v0.0.1/rep_fast_base-1b89ebf9.onnx https://github.com/felixdittrich92/OnnxTR/releases/download/v0.0.1/crnn_vgg16_bn-662979cc.onnx

How load this models locally

How to fix ?

wget https://github.com/felixdittrich92/OnnxTR/releases/download/v0.0.1/parseq-00b40714.onnx
wget https://github.com/felixdittrich92/OnnxTR/releases/download/v0.0.1/linknet_resnet18-e0e0b9dc.onnx
reco_model = parseq("<PATH_TO>/parseq-00b40714.onnx")
det_model = linknet_resnet18("<PATH_TO>/linknet_resnet18-e0e0b9dc.onnx")

# Rest of your code

:)

Looking for this because paseq and linknet_resnet18 not working for above models

felixdittrich92 commented 2 months ago

https://github.com/felixdittrich92/OnnxTR/releases/download/v0.0.1/rep_fast_base-1b89ebf9.onnx https://github.com/felixdittrich92/OnnxTR/releases/download/v0.0.1/crnn_vgg16_bn-662979cc.onnx How load this models locally

How to fix ?

wget https://github.com/felixdittrich92/OnnxTR/releases/download/v0.0.1/parseq-00b40714.onnx
wget https://github.com/felixdittrich92/OnnxTR/releases/download/v0.0.1/linknet_resnet18-e0e0b9dc.onnx
reco_model = parseq("<PATH_TO>/parseq-00b40714.onnx")
det_model = linknet_resnet18("<PATH_TO>/linknet_resnet18-e0e0b9dc.onnx")

# Rest of your code

:)

Looking for this because paseq and linknet_resnet18 not working for above models

If you want to use the crnn and fast base onnx (from your initial code snippet) models you need:

from onnxtr.models import crnn_vgg16_bn, fast_base

And remove the vocab="ABC" :)

saisoulpage commented 2 months ago

How many different types of languages will this models support example English. etc

one more question, i am still getting below error

  |     model = ocr_predictor(det_model=det_model, reco_model=reco_model,
api_1             |   File "/usr/local/lib/python3.10/site-packages/onnxtr/models/zoo.py", line 121, in ocr_predictor
api_1             |     return _predictor(
api_1             |   File "/usr/local/lib/python3.10/site-packages/onnxtr/models/zoo.py", line 34, in _predictor
api_1             |     det_predictor = detection_predictor(
api_1             |   File "/usr/local/lib/python3.10/site-packages/onnxtr/models/detection/zoo.py", line 86, in detection_predictor
api_1             |     return _predictor(arch, assume_straight_pages, load_in_8_bit, engine_cfg=engine_cfg, **kwargs)
api_1             |   File "/usr/local/lib/python3.10/site-packages/onnxtr/models/detection/zoo.py", line 39, in _predictor
api_1             |     _model = detection.__dict__[arch](
api_1             |   File "/usr/local/lib/python3.10/site-packages/onnxtr/models/detection/models/fast.py", line 189, in fast_base
api_1             |     return _fast("fast_base", model_path, load_in_8_bit, engine_cfg, **kwargs)
api_1             |   File "/usr/local/lib/python3.10/site-packages/onnxtr/models/detection/models/fast.py", line 102, in _fast
api_1             |     return FAST(model_path, cfg=default_cfgs[arch], engine_cfg=engine_cfg, **kwargs)
api_1             |   File "/usr/local/lib/python3.10/site-packages/onnxtr/models/detection/models/fast.py", line 64, in __init__
api_1             |     super().__init__(url=model_path, engine_cfg=engine_cfg, **kwargs)
api_1             |   File "/usr/local/lib/python3.10/site-packages/onnxtr/models/engine.py", line 97, in __init__
api_1             |     self.runtime = InferenceSession(archive_path, providers=self.providers, sess_options=self.session_options)
api_1             |   File "/usr/local/lib/python3.10/site-packages/onnxruntime/capi/onnxruntime_inference_collection.py", line 349, in __init__
api_1             |     raise TypeError("Unable to load from type '{0}'".format(type(path_or_bytes)))
api_1             | TypeError: Unable to load from type '<class 'pathlib.PosixPath'>'
felixdittrich92 commented 2 months ago

Could you share a full code snippet which raises the error ? The default recognition models are trained on french vocab. But you could try also a multilingual model (https://huggingface.co/Felix92/onnxtr-parseq-multilingual-v1) - only available via hf atm