Open ckgithub2019 opened 1 month ago
It seems like you run this project at first time and is download all model needed. So it maybe a little bit slow. If not, you can try to save the model parameter file locally and then load it that it what I do.
Now loading model only cost me about 5 second.
I download the whole project and then I changed marker/model.py which contains five funciton to load model. And I change it a little bit as follow:
`import os os.environ["PYTORCH_ENABLE_MPS_FALLBACK"] = "1" # For some reason, transformers decided to use .isin for a simple op, which is not supported on MPS
from marker.postprocessors.editor import load_editing_model from surya.model.detection.model import load_model as load_detection_model, load_processor as load_detection_processor from texify.model.model import load_model as load_texify_model from texify.model.processor import load_processor as load_texify_processor from marker.settings import settings from surya.model.recognition.model import load_model as load_recognition_model from surya.model.recognition.processor import load_processor as load_recognition_processor from surya.model.ordering.model import load_model as load_order_model from surya.model.ordering.processor import load_processor as load_order_processor import torch import torch
def setup_detection_model(device=None, dtype=None): os.makedir('../model_para',exist_ok=True) if device is None: device = "cuda:5" if torch.cuda.is_available() else "cpu"
if device.startswith("cuda"):
device = torch.device("cuda:5")
try:
# model = torch.jit.load("detection_model.pt", map_location=device)
model = torch.load("../model_para/detection_model.pth",map_location=device)
print(f"Using pre-loaded detection model from GPU memory({device}).")
except:
model = load_detection_model(device=device, dtype=dtype)
# torch.jit.save(model, "detection_model.pt")
model.to(device)
torch.save(model, "../model_para/detection_model.pth")
print(f"Loaded detection model from disk and saved to GPU memory({device}).")
else:
model = load_detection_model(device=device, dtype=dtype)
print("Loaded detection model on CPU.")
processor = load_detection_processor()
model.processor = processor
return model
def setup_recognition_model(langs, device=None, dtype=None): if device is None: device = "cuda:5" if torch.cuda.is_available() else "cpu"
if device.startswith("cuda"):
device = torch.device("cuda:5")
try:
# model = torch.jit.load("recognition_model.pt", map_location=device)
model = torch.load("../model_para/recognition_model.pth",map_location=device)
print(f"Using pre-loaded detection model from GPU memory({device}).")
except :
model = load_recognition_model(langs=langs, device=device, dtype=dtype)
# torch.jit.save(model, "recognition_model.pt")
model.to(device)
torch.save(model, "../model_para/recognition_model.pth")
print(f"Loaded detection model from disk and saved to GPU memory({device}).")
else:
model = load_recognition_model(langs=langs, device=device, dtype=dtype)
print("Loaded recognition model on CPU.")
processor = load_recognition_processor()
model.processor = processor
return model
def setup_texify_model(device=None, dtype=None): if device is None: device = "cuda:5" if torch.cuda.is_available() else "cpu"
if device.startswith("cuda"):
device = torch.device("cuda:5")
try:
# model = torch.jit.load("texify_model.pt", map_location=device)
model = torch.load("../model_para/texify_model.pth",map_location=device)
print(f"Using pre-loaded detection model from GPU memory({device}).")
except :
model = load_texify_model(checkpoint=settings.TEXIFY_MODEL_NAME, device=device, dtype=dtype)
# torch.jit.save(model, "texify_model.pt")
model.to(device)
torch.save(model, "../model_para/texify_model.pth")
print(f"Loaded detection model from disk and saved to GPU memory({device}).")
else:
model = load_texify_model(checkpoint=settings.TEXIFY_MODEL_NAME, device=device, dtype=dtype)
print("Loaded texify model on CPU.")
processor = load_texify_processor()
model.processor = processor
return model
def setup_layout_model(device=None, dtype=None): if device is None: device = "cuda:5" if torch.cuda.is_available() else "cpu"
if device.startswith("cuda"):
device = torch.device("cuda:5")
try:
# model = torch.jit.load("layout_model.pt", map_location=device)
model = torch.load("../model_para/layout_model.pth",map_location=device)
print(f"Using pre-loaded detection model from GPU memory({device}).")
except :
model = load_detection_model(checkpoint=settings.LAYOUT_MODEL_CHECKPOINT, device=device, dtype=dtype)
# torch.jit.save(model, "layout_model.pt")
model.to(device)
torch.save(model, "../model_para/layout_model.pth")
print(f"Loaded detection model from disk and saved to GPU memory({device}).")
else:
model = load_detection_model(checkpoint=settings.LAYOUT_MODEL_CHECKPOINT, device=device, dtype=dtype)
print("Loaded layout model on CPU.")
processor = load_detection_processor(checkpoint=settings.LAYOUT_MODEL_CHECKPOINT)
model.processor = processor
return model
def setup_order_model(device=None, dtype=None): if device is None: device = "cuda:5" if torch.cuda.is_available() else "cpu"
if device.startswith("cuda"):
device = torch.device("cuda:5")
try:
# model = torch.jit.load("order_model.pt", map_location=device)
model = torch.load("../model_para/order_model.pth",map_location=device)
print(f"Using pre-loaded detection model from GPU memory({device}).")
except :
model = load_order_model(device=device, dtype=dtype)
# torch.jit.save(model, "order_model.pt")
model.to(device)
torch.save(model, "../model_para/order_model.pth")
print(f"Loaded detection model from disk and saved to GPU memory({device}).")
else:
model = load_order_model(device=device, dtype=dtype)
print("Loaded order model on CPU.")
processor = load_order_processor()
model.processor = processor
return model
def load_all_models(langs=None, device=None, dtype=None, force_load_ocr=False): if device is not None: assert dtype is not None, "Must provide dtype if device is provided"
# langs is optional list of languages to prune from recognition MoE model
detection = setup_detection_model(device, dtype)
layout = setup_layout_model(device, dtype)
order = setup_order_model(device, dtype)
edit = load_editing_model(device, dtype)
# Only load recognition model if we'll need it for all pdfs
ocr = setup_recognition_model(langs, device, dtype)
texify = setup_texify_model(device, dtype)
model_lst = [texify, layout, order, edit, detection, ocr]
return model_lst`
My code is not very beautiful, but I believe you can understand my changes.
i achieved similar results with pickle:
def load_models(device=None, dtype=None, langs=None, pickle_path='models.pkl'):
from marker.models import setup_recognition_model, setup_texify_model, setup_order_model, setup_layout_model, \
setup_detection_model
from marker.postprocessors.editor import load_editing_model
if os.path.exists(pickle_path):
print(f"Loading models from pickle file: {pickle_path}")
start_time = time.time()
with open(pickle_path, 'rb') as f:
model_lst = pickle.load(f)
print(f"Models loaded from pickle in {time.time() - start_time:.2f} seconds")
# Move models to the specified device after unpickling
if device:
model_lst = [model.to(device) for model in model_lst]
return model_lst
print("Pickle file not found. Loading models from scratch and creating pickle file.")
start_time = time.time()
if device is not None:
assert dtype is not None, "Must provide dtype if device is provided"
# langs is optional list of languages to prune from recognition MoE model
detection = setup_detection_model(device, dtype)
layout = setup_layout_model(device, dtype)
order = setup_order_model(device, dtype)
edit = load_editing_model(device, dtype)
# Only load recognition model if we'll need it for all pdfs
ocr = setup_recognition_model(langs, device, dtype)
texify = setup_texify_model(device, dtype)
model_lst = [texify, layout, order, edit, detection, ocr]
print(f"Models loaded from scratch in {time.time() - start_time:.2f} seconds")
print(f"Pickling models to: {pickle_path}")
pickle_start_time = time.time()
with open(pickle_path, 'wb') as f:
pickle.dump(model_lst, f)
print(f"Models pickled in {time.time() - pickle_start_time:.2f} seconds")
return model_lst
but using torch.jit is a little bit more elegant :)
also here is a revisted code from rainny-day:
import os
os.environ[
"PYTORCH_ENABLE_MPS_FALLBACK"] = "1" # For some reason, transformers decided to use .isin for a simple op, which is not supported on MPS
from marker.postprocessors.editor import load_editing_model
from surya.model.detection.model import load_model as load_detection_model, load_processor as load_detection_processor
from texify.model.model import load_model as load_texify_model
from texify.model.processor import load_processor as load_texify_processor
from marker.settings import settings
from surya.model.recognition.model import load_model as load_recognition_model
from surya.model.recognition.processor import load_processor as load_recognition_processor
from surya.model.ordering.model import load_model as load_order_model
from surya.model.ordering.processor import load_processor as load_order_processor
import torch
def get_default_device():
if torch.cuda.is_available():
return f"cuda:{torch.cuda.current_device()}"
return "cpu"
def setup_detection_model(device=None, dtype=None):
os.makedirs('../model_para', exist_ok=True)
if device is None:
device = get_default_device()
if device.startswith("cuda"):
try:
# model = torch.jit.load("detection_model.pt", map_location=device)
model = torch.load("../model_para/detection_model.pth", map_location=device)
print(f"Using pre-loaded detection model from GPU memory({device}).")
except:
model = load_detection_model(device=device, dtype=dtype)
# torch.jit.save(model, "detection_model.pt")
model.to(device)
torch.save(model, "../model_para/detection_model.pth")
print(f"Loaded detection model from disk and saved to GPU memory({device}).")
else:
model = load_detection_model(device=device, dtype=dtype)
print("Loaded detection model on CPU.")
processor = load_detection_processor()
model.processor = processor
return model
def setup_recognition_model(langs, device=None, dtype=None):
if device is None:
device = get_default_device()
if device.startswith("cuda"):
try:
# model = torch.jit.load("recognition_model.pt", map_location=device)
model = torch.load("../model_para/recognition_model.pth", map_location=device)
print(f"Using pre-loaded detection model from GPU memory({device}).")
except:
model = load_recognition_model(langs=langs, device=device, dtype=dtype)
# torch.jit.save(model, "recognition_model.pt")
model.to(device)
torch.save(model, "../model_para/recognition_model.pth")
print(f"Loaded detection model from disk and saved to GPU memory({device}).")
else:
model = load_recognition_model(langs=langs, device=device, dtype=dtype)
print("Loaded recognition model on CPU.")
processor = load_recognition_processor()
model.processor = processor
return model
def setup_texify_model(device=None, dtype=None):
if device is None:
device = get_default_device()
if device.startswith("cuda"):
try:
# model = torch.jit.load("texify_model.pt", map_location=device)
model = torch.load("../model_para/texify_model.pth", map_location=device)
print(f"Using pre-loaded detection model from GPU memory({device}).")
except:
model = load_texify_model(checkpoint=settings.TEXIFY_MODEL_NAME, device=device, dtype=dtype)
# torch.jit.save(model, "texify_model.pt")
model.to(device)
torch.save(model, "../model_para/texify_model.pth")
print(f"Loaded detection model from disk and saved to GPU memory({device}).")
else:
model = load_texify_model(checkpoint=settings.TEXIFY_MODEL_NAME, device=device, dtype=dtype)
print("Loaded texify model on CPU.")
processor = load_texify_processor()
model.processor = processor
return model
def setup_layout_model(device=None, dtype=None):
if device is None:
device = get_default_device()
if device.startswith("cuda"):
try:
# model = torch.jit.load("layout_model.pt", map_location=device)
model = torch.load("../model_para/layout_model.pth", map_location=device)
print(f"Using pre-loaded detection model from GPU memory({device}).")
except:
model = load_detection_model(checkpoint=settings.LAYOUT_MODEL_CHECKPOINT, device=device, dtype=dtype)
# torch.jit.save(model, "layout_model.pt")
model.to(device)
torch.save(model, "../model_para/layout_model.pth")
print(f"Loaded detection model from disk and saved to GPU memory({device}).")
else:
model = load_detection_model(checkpoint=settings.LAYOUT_MODEL_CHECKPOINT, device=device, dtype=dtype)
print("Loaded layout model on CPU.")
processor = load_detection_processor(checkpoint=settings.LAYOUT_MODEL_CHECKPOINT)
model.processor = processor
return model
def setup_order_model(device=None, dtype=None):
if device is None:
device = get_default_device()
if device.startswith("cuda"):
try:
# model = torch.jit.load("order_model.pt", map_location=device)
model = torch.load("../model_para/order_model.pth", map_location=device)
print(f"Using pre-loaded detection model from GPU memory({device}).")
except:
model = load_order_model(device=device, dtype=dtype)
# torch.jit.save(model, "order_model.pt")
model.to(device)
torch.save(model, "../model_para/order_model.pth")
print(f"Loaded detection model from disk and saved to GPU memory({device}).")
else:
model = load_order_model(device=device, dtype=dtype)
print("Loaded order model on CPU.")
processor = load_order_processor()
model.processor = processor
return model
def load_all_models(langs=None, device=None, dtype=None, force_load_ocr=False):
if device is not None:
assert dtype is not None, "Must provide dtype if device is provided"
# langs is optional list of languages to prune from recognition MoE model
detection = setup_detection_model(device, dtype)
layout = setup_layout_model(device, dtype)
order = setup_order_model(device, dtype)
edit = load_editing_model(device, dtype)
# Only load recognition model if we'll need it for all pdfs
ocr = setup_recognition_model(langs, device, dtype)
texify = setup_texify_model(device, dtype)
model_lst = [texify, layout, order, edit, detection, ocr]
return model_lst
i achieved similar results with pickle:
def load_models(device=None, dtype=None, langs=None, pickle_path='models.pkl'): from marker.models import setup_recognition_model, setup_texify_model, setup_order_model, setup_layout_model, \ setup_detection_model from marker.postprocessors.editor import load_editing_model if os.path.exists(pickle_path): print(f"Loading models from pickle file: {pickle_path}") start_time = time.time() with open(pickle_path, 'rb') as f: model_lst = pickle.load(f) print(f"Models loaded from pickle in {time.time() - start_time:.2f} seconds") # Move models to the specified device after unpickling if device: model_lst = [model.to(device) for model in model_lst] return model_lst print("Pickle file not found. Loading models from scratch and creating pickle file.") start_time = time.time() if device is not None: assert dtype is not None, "Must provide dtype if device is provided" # langs is optional list of languages to prune from recognition MoE model detection = setup_detection_model(device, dtype) layout = setup_layout_model(device, dtype) order = setup_order_model(device, dtype) edit = load_editing_model(device, dtype) # Only load recognition model if we'll need it for all pdfs ocr = setup_recognition_model(langs, device, dtype) texify = setup_texify_model(device, dtype) model_lst = [texify, layout, order, edit, detection, ocr] print(f"Models loaded from scratch in {time.time() - start_time:.2f} seconds") print(f"Pickling models to: {pickle_path}") pickle_start_time = time.time() with open(pickle_path, 'wb') as f: pickle.dump(model_lst, f) print(f"Models pickled in {time.time() - pickle_start_time:.2f} seconds") return model_lst
but using torch.jit is a little bit more elegant :)
Hi @frankbaele can you please tell me where can i put this code?
in python instead of using the marker load_all_models use this function.
load_all_models
actually I am new to this tool and I install it via pip command mentioned in readme. and I am running this tool via command marker_single . so if possible can you let me know what can be done in my case
@frankbaele Or maybe guide me how can i run this tool through python script, without using CLI command . Thanks in advance
also here is a revisted code from rainny-day:
import os os.environ[ "PYTORCH_ENABLE_MPS_FALLBACK"] = "1" # For some reason, transformers decided to use .isin for a simple op, which is not supported on MPS from marker.postprocessors.editor import load_editing_model from surya.model.detection.model import load_model as load_detection_model, load_processor as load_detection_processor from texify.model.model import load_model as load_texify_model from texify.model.processor import load_processor as load_texify_processor from marker.settings import settings from surya.model.recognition.model import load_model as load_recognition_model from surya.model.recognition.processor import load_processor as load_recognition_processor from surya.model.ordering.model import load_model as load_order_model from surya.model.ordering.processor import load_processor as load_order_processor import torch def get_default_device(): if torch.cuda.is_available(): return f"cuda:{torch.cuda.current_device()}" return "cpu" def setup_detection_model(device=None, dtype=None): os.makedirs('../model_para', exist_ok=True) if device is None: device = get_default_device() if device.startswith("cuda"): try: # model = torch.jit.load("detection_model.pt", map_location=device) model = torch.load("../model_para/detection_model.pth", map_location=device) print(f"Using pre-loaded detection model from GPU memory({device}).") except: model = load_detection_model(device=device, dtype=dtype) # torch.jit.save(model, "detection_model.pt") model.to(device) torch.save(model, "../model_para/detection_model.pth") print(f"Loaded detection model from disk and saved to GPU memory({device}).") else: model = load_detection_model(device=device, dtype=dtype) print("Loaded detection model on CPU.") processor = load_detection_processor() model.processor = processor return model def setup_recognition_model(langs, device=None, dtype=None): if device is None: device = get_default_device() if device.startswith("cuda"): try: # model = torch.jit.load("recognition_model.pt", map_location=device) model = torch.load("../model_para/recognition_model.pth", map_location=device) print(f"Using pre-loaded detection model from GPU memory({device}).") except: model = load_recognition_model(langs=langs, device=device, dtype=dtype) # torch.jit.save(model, "recognition_model.pt") model.to(device) torch.save(model, "../model_para/recognition_model.pth") print(f"Loaded detection model from disk and saved to GPU memory({device}).") else: model = load_recognition_model(langs=langs, device=device, dtype=dtype) print("Loaded recognition model on CPU.") processor = load_recognition_processor() model.processor = processor return model def setup_texify_model(device=None, dtype=None): if device is None: device = get_default_device() if device.startswith("cuda"): try: # model = torch.jit.load("texify_model.pt", map_location=device) model = torch.load("../model_para/texify_model.pth", map_location=device) print(f"Using pre-loaded detection model from GPU memory({device}).") except: model = load_texify_model(checkpoint=settings.TEXIFY_MODEL_NAME, device=device, dtype=dtype) # torch.jit.save(model, "texify_model.pt") model.to(device) torch.save(model, "../model_para/texify_model.pth") print(f"Loaded detection model from disk and saved to GPU memory({device}).") else: model = load_texify_model(checkpoint=settings.TEXIFY_MODEL_NAME, device=device, dtype=dtype) print("Loaded texify model on CPU.") processor = load_texify_processor() model.processor = processor return model def setup_layout_model(device=None, dtype=None): if device is None: device = get_default_device() if device.startswith("cuda"): try: # model = torch.jit.load("layout_model.pt", map_location=device) model = torch.load("../model_para/layout_model.pth", map_location=device) print(f"Using pre-loaded detection model from GPU memory({device}).") except: model = load_detection_model(checkpoint=settings.LAYOUT_MODEL_CHECKPOINT, device=device, dtype=dtype) # torch.jit.save(model, "layout_model.pt") model.to(device) torch.save(model, "../model_para/layout_model.pth") print(f"Loaded detection model from disk and saved to GPU memory({device}).") else: model = load_detection_model(checkpoint=settings.LAYOUT_MODEL_CHECKPOINT, device=device, dtype=dtype) print("Loaded layout model on CPU.") processor = load_detection_processor(checkpoint=settings.LAYOUT_MODEL_CHECKPOINT) model.processor = processor return model def setup_order_model(device=None, dtype=None): if device is None: device = get_default_device() if device.startswith("cuda"): try: # model = torch.jit.load("order_model.pt", map_location=device) model = torch.load("../model_para/order_model.pth", map_location=device) print(f"Using pre-loaded detection model from GPU memory({device}).") except: model = load_order_model(device=device, dtype=dtype) # torch.jit.save(model, "order_model.pt") model.to(device) torch.save(model, "../model_para/order_model.pth") print(f"Loaded detection model from disk and saved to GPU memory({device}).") else: model = load_order_model(device=device, dtype=dtype) print("Loaded order model on CPU.") processor = load_order_processor() model.processor = processor return model def load_all_models(langs=None, device=None, dtype=None, force_load_ocr=False): if device is not None: assert dtype is not None, "Must provide dtype if device is provided" # langs is optional list of languages to prune from recognition MoE model detection = setup_detection_model(device, dtype) layout = setup_layout_model(device, dtype) order = setup_order_model(device, dtype) edit = load_editing_model(device, dtype) # Only load recognition model if we'll need it for all pdfs ocr = setup_recognition_model(langs, device, dtype) texify = setup_texify_model(device, dtype) model_lst = [texify, layout, order, edit, detection, ocr] return model_lst
This works for me with some little changes
I installed and deployed the marker-pdf locally, the output was successful based on GPU + CUDA mode, but the model loading speed is super slow (load_all_models() from source code). WHY? normal? or is any setting required to speed it up?
Here is the actual tested log: python main.py DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): huggingface.co:443 Loaded detection model vikp/surya_det3 on device cuda with dtype torch.float16 DEBUG:urllib3.connectionpool:Starting new HTTPS connection (2): huggingface.co:443 DEBUG:urllib3.connectionpool:Starting new HTTPS connection (3): huggingface.co:443 Loaded detection model vikp/surya_layout3 on device cuda with dtype torch.float16 DEBUG:urllib3.connectionpool:Starting new HTTPS connection (4): huggingface.co:443 DEBUG:urllib3.connectionpool:Starting new HTTPS connection (5): huggingface.co:443 DEBUG:urllib3.connectionpool:Starting new HTTPS connection (6): huggingface.co:443 Loaded reading order model vikp/surya_order on device cuda with dtype torch.float16 DEBUG:urllib3.connectionpool:Starting new HTTPS connection (7): huggingface.co:443 DEBUG:urllib3.connectionpool:Starting new HTTPS connection (8): huggingface.co:443 DEBUG:urllib3.connectionpool:Starting new HTTPS connection (9): huggingface.co:443 Loaded recognition model vikp/surya_rec on device cuda with dtype torch.float16 DEBUG:urllib3.connectionpool:Starting new HTTPS connection (10): huggingface.co:443 DEBUG:urllib3.connectionpool:Starting new HTTPS connection (11): huggingface.co:443 DEBUG:urllib3.connectionpool:Starting new HTTPS connection (12): huggingface.co:443 Loaded texify model to cuda with torch.float16 dtype DEBUG:urllib3.connectionpool:Starting new HTTPS connection (13): huggingface.co:443 DEBUG:urllib3.connectionpool:Starting new HTTPS connection (14): huggingface.co:443 DEBUG:urllib3.connectionpool:Starting new HTTPS connection (15): huggingface.co:443 DEBUG:urllib3.connectionpool:Starting new HTTPS connection (16): huggingface.co:443 INFO:Total elapsed time: 3.1 minutes Detecting bboxes: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 7/7 [00:05<00:00, 1.30it/s] Recognizing Text: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4/4 [00:03<00:00, 1.15it/s] Detecting bboxes: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 5/5 [00:08<00:00, 1.64s/it] Finding reading order: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 5/5 [00:04<00:00, 1.10it/s] INFO:Total elapsed time: 37.7 seconds