Closed kyleclo closed 1 year ago
pretty much exact port from mmda. testing is fine on pdf fixtures works
mmda
import json import os import pathlib import unittest from papermage.magelib import Document, Entity, Metadata, Span from papermage.recipes import CoreRecipe fixture_path = pathlib.Path(__file__).parent / "tests/fixtures" recipe = CoreRecipe() doc = recipe.from_path(pdfpath=fixture_path / "1903.10676.pdf") doc = recipe.from_path(pdfpath=fixture_path / "2304.02623v1.pdf") doc = recipe.from_path(pdfpath=fixture_path / "2020.acl-main.447.pdf") doc = recipe.from_path(pdfpath=fixture_path / "4be952924cd565488b4a239dc6549095029ee578.pdf") doc = recipe.from_path(pdfpath=fixture_path / "2023.eacl-main.121.pdf")
pretty much exact port from
mmda
. testing is fine on pdf fixtures works