VikParuchuri / surya

OCR, layout analysis, reading order, table recognition in 90+ languages
https://www.datalab.to
GNU General Public License v3.0
14.29k stars 889 forks source link

table recognition:text is null #201

Open zzk2021 opened 1 month ago

zzk2021 commented 1 month ago

{"0a9c59d3605e65f75e3374cb4767762ea021a1ac894308f08b2d0cf4fa9780f8": [{"cells": [{"bbox": [94.0, 0.0, 226.0, 9.0], "row_id": 0, "col_id": 0, "text": null}, {"bbox": [533.0, 0.0, 856.0, 13.0], "row_id": 0, "col_id": 1, "text": null}, {"bbox": [1017.0, 40.0, 1183.0, 68.0], "row_id": 1, "col_id": 3, "text": null}, {"bbox": [563.0, 41.0, 788.0, 69.0], "row_id": 1, "col_id": 1, "text": null}, {"bbox": [16.0, 94.0, 82.0, 123.0], "row_id": 2, "col_id": 0, "text": null}, {"bbox": [1164.0, 95.0, 1264.0, 124.0], "row_id": 2, "col_id": 3, "text": null}, {"bbox": [866.0, 96.0, 966.0, 123.0], "row_id": 2, "col_id": 2, "text": null}, {"bbox": [17.0, 150.0, 244.0, 179.0], "row_id": 3, "col_id": 0, "text": null}, {"bbox": [1102.0, 150.0, 1328.0, 179.0], "row_id": 3, "col_id": 3, "text": null}, {"bbox": [852.0, 152.0, 981.0, 179.0], "row_id": 3, "col_id": 2, "text": null}, {"bbox": [17.0, 207.0, 153.0, 236.0], "row_id": 4, "col_id": 0, "text": null}, {"bbox": [868.0, 207.0, 969.0, 235.0], "row_id": 4, "col_id": 2, "text": null}, {"bbox": [1165.0, 207.0, 1267.0, 235.0], "row_id": 4, "col_id": 3, "text": null}, {"bbox": [19.0, 263.0, 212.0, 290.0], "row_id": 5, "col_id": 0, "text": null}, {"bbox": [890.0, 264.0, 945.0, 289.0], "row_id": 5, "col_id": 2, "text": null}, {"bbox": [1188.0, 264.0, 1241.0, 289.0], "row_id": 5, "col_id": 3, "text": null}, {"bbox": [17.0, 318.0, 189.0, 344.0], "row_id": 6, "col_id": 0, "text": null}, {"bbox": [890.0, 320.0, 944.0, 343.0], "row_id": 6, "col_id": 2, "text": null}, {"bbox": [1188.0, 320.0, 1240.0, 343.0], "row_id": 6, "col_id": 3, "text": null}, {"bbox": [17.0, 375.0, 251.0, 399.0], "row_id": 7, "col_id": 0, "text": null}, {"bbox": [1189.0, 375.0, 1240.0, 398.0], "row_id": 7, "col_id": 3, "text": null}, {"bbox": [890.0, 376.0, 944.0, 399.0], "row_id": 7, "col_id": 2, "text": null}, {"bbox": [16.0, 429.0, 253.0, 457.0], "row_id": 8, "col_id": 0, "text": null}, {"bbox": [898.0, 431.0, 937.0, 457.0], "row_id": 8, "col_id": 2, "text": null}, {"bbox": [1196.0, 432.0, 1234.0, 455.0], "row_id": 8, "col_id": 3, "text": null}, {"bbox": [17.0, 484.0, 313.0, 512.0], "row_id": 9, "col_id": 0, "text": null}, {"bbox": [877.0, 486.0, 959.0, 511.0], "row_id": 9, "col_id": 2, "text": null}, {"bbox": [1175.0, 486.0, 1258.0, 510.0], "row_id": 9, "col_id": 3, "text": null}, {"bbox": [16.0, 540.0, 252.0, 567.0], "row_id": 10, "col_id": 0, "text": null}, {"bbox": [876.0, 541.0, 959.0, 567.0], "row_id": 10, "col_id": 2, "text": null}, {"bbox": [1174.0, 542.0, 1257.0, 567.0], "row_id": 10, "col_id": 3, "text": null}, {"bbox": [16.0, 595.0, 528.0, 623.0], "row_id": 11, "col_id": 0, "text": null}, {"bbox": [870.0, 597.0, 966.0, 622.0], "row_id": 11, "col_id": 2, "text": null}, {"bbox": [1167.0, 597.0, 1264.0, 622.0], "row_id": 11, "col_id": 3, "text": null}, {"bbox": [16.0, 650.0, 543.0, 680.0], "row_id": 12, "col_id": 0, "text": null}, {"bbox": [1167.0, 652.0, 1265.0, 677.0], "row_id": 12, "col_id": 3, "text": null}, {"bbox": [869.0, 653.0, 968.0, 677.0], "row_id": 12, "col_id": 2, "text": null}, {"bbox": [16.0, 706.0, 543.0, 734.0], "row_id": 13, "col_id": 0, "text": null}, {"bbox": [870.0, 707.0, 966.0, 733.0], "row_id": 13, "col_id": 2, "text": null}, {"bbox": [1167.0, 708.0, 1261.0, 731.0], "row_id": 13, "col_id": 3, "text": null}, {"bbox": [16.0, 764.0, 412.0, 789.0], "row_id": 14, "col_id": 0, "text": null}, {"bbox": [890.0, 764.0, 945.0, 789.0], "row_id": 14, "col_id": 2, "text": null}, {"bbox": [1188.0, 764.0, 1241.0, 789.0], "row_id": 14, "col_id": 3, "text": null}, {"bbox": [17.0, 817.0, 293.0, 845.0], "row_id": 15, "col_id": 0, "text": null}, {"bbox": [882.0, 819.0, 952.0, 844.0], "row_id": 15, "col_id": 2, "text": null}, {"bbox": [1180.0, 819.0, 1248.0, 843.0], "row_id": 15, "col_id": 3, "text": null}, {"bbox": [905.0, 873.0, 927.0, 898.0], "row_id": 16, "col_id": 2, "text": null}, {"bbox": [1203.0, 873.0, 1226.0, 898.0], "row_id": 16, "col_id": 3, "text": null}, {"bbox": [16.0, 874.0, 212.0, 899.0], "row_id": 16, "col_id": 0, "text": null}, {"bbox": [18.0, 928.0, 253.0, 955.0], "row_id": 17, "col_id": 0, "text": null}, {"bbox": [858.0, 930.0, 974.0, 955.0], "row_id": 17, "col_id": 2, "text": null}, {"bbox": [1156.0, 930.0, 1273.0, 955.0], "row_id": 17, "col_id": 3, "text": null}, {"bbox": [17.0, 984.0, 308.0, 1011.0], "row_id": 18, "col_id": 0, "text": null}, {"bbox": [875.0, 985.0, 960.0, 1010.0], "row_id": 18, "col_id": 2, "text": null}, {"bbox": [1170.0, 985.0, 1258.0, 1010.0], "row_id": 18, "col_id": 3, "text": null}], "rows": [{"bbox": [85.5712890625, 0.0, 856.3974609375, 12.03515625], "row_id": 0, "col_id": null, "text": null}, {"bbox": [559.2939453125, 43.1259765625, 1176.7763671875, 71.2080078125], "row_id": 1, "col_id": null, "text": null}, {"bbox": [15.060546875, 93.2724609375, 1263.716796875, 123.3603515625], "row_id": 2, "col_id": null, "text": null}, {"bbox": [15.060546875, 148.93505859375, 1326.697265625, 178.02001953125], "row_id": 3, "col_id": null, "text": null}, {"bbox": [15.7451171875, 206.10205078125, 1265.7705078125, 235.18701171875], "row_id": 4, "col_id": null, "text": null}, {"bbox": [17.1142578125, 262.26611328125, 1239.7568359375, 289.34521484375], "row_id": 5, "col_id": null, "text": null}, {"bbox": [17.1142578125, 317.42724609375, 1239.7568359375, 342.50048828125], "row_id": 6, "col_id": null, "text": null}, {"bbox": [17.1142578125, 373.08984375, 1239.7568359375, 397.16015625], "row_id": 7, "col_id": null, "text": null}, {"bbox": [13.0068359375, 428.2509765625, 1232.9111328125, 456.3330078125], "row_id": 8, "col_id": null, "text": null}, {"bbox": [15.7451171875, 483.412109375, 1257.5556640625, 511.494140625], "row_id": 9, "col_id": null, "text": null}, {"bbox": [14.3759765625, 539.07470703125, 1256.1865234375, 566.15380859375], "row_id": 10, "col_id": null, "text": null}, {"bbox": [13.69140625, 594.7373046875, 1262.34765625, 622.8193359375], "row_id": 11, "col_id": null, "text": null}, {"bbox": [13.69140625, 649.8984375, 1262.34765625, 679.986328125], "row_id": 12, "col_id": null, "text": null}, {"bbox": [13.69140625, 705.0595703125, 1259.609375, 733.1416015625], "row_id": 13, "col_id": null, "text": null}, {"bbox": [13.69140625, 762.72802734375, 1240.44140625, 787.80126953125], "row_id": 14, "col_id": null, "text": null}, {"bbox": [15.060546875, 816.384765625, 1247.287109375, 844.466796875], "row_id": 15, "col_id": null, "text": null}, {"bbox": [14.3759765625, 870.04150390625, 1226.0654296875, 899.12646484375], "row_id": 16, "col_id": null, "text": null}, {"bbox": [17.798828125, 927.20849609375, 1271.931640625, 954.28759765625], "row_id": 17, "col_id": null, "text": null}, {"bbox": [15.7451171875, 983.37255859375, 1257.5556640625, 1010.45166015625], "row_id": 18, "col_id": null, "text": null}], "cols": [{"bbox": [3.4228515625, -0.50146484375, 541.4951171875, 1009.44873046875], "row_id": null, "col_id": 0, "text": null}, {"bbox": [520.9580078125, -0.50146484375, 864.6123046875, 1009.44873046875], "row_id": null, "col_id": 1, "text": null}, {"bbox": [848.8671875, 39.61572265625, 980.3046875, 1007.44287109375], "row_id": null, "col_id": 2, "text": null}, {"bbox": [1154.8701171875, 96.28125, 1271.2470703125, 1006.94140625], "row_id": null, "col_id": 3, "text": null}], "image_bbox": [0.0, 0.0, 1402.0, 1027.0], "page": 1, "table_idx": 0}]}

wenerme commented 1 month ago

Use tabled instead https://github.com/VikParuchuri/tabled