Academic-Hammer / SciTSR

Table structure recognition dataset of the paper: Complicated Table Structure Recognition
https://arxiv.org/pdf/1908.04729.pdf
MIT License
351 stars 58 forks source link

测试集中没有rel和json文件夹 #43

Open MathamPollard opened 1 year ago

MathamPollard commented 1 year ago

测试集中没有rel和json文件夹,可是以下代码会检查rel和json文件夹是否存在,不存在则报错,要怎么解决? def load_dataset(self, dataset_dir, with_cells, trim=None, debug=False, exts=None): dataset, cells = [], [] if exts is None: exts = ['chunk','rel'] if with_cells: exts.append('json') sub_paths = self.get_sub_paths(dataset_dir, exts, trim=trim) for i, paths in enumerate(sub_paths): if debug and i > 50: break chunk_path = paths[0] relation_path = paths[1]

        chunks = self.load_chunks(chunk_path)
        # TODO handle big tables
        #if len(chunks) > 100 or len(chunks) == 0: continue
        relations = self.load_relations(relation_path)
        #new_chunks, new_rels = self.clean_chunk_rel(chunks, relations)
        #chunks, relations = new_chunks, new_rels

        if with_cells:
            cell_path = paths[2]
            with open(cell_path) as f:
                cell_json = json.load(f)
        else:
            cell_json = None

        dataset.append(Data(
            chunks=chunks,
            relations=relations,
            cells=cell_json,
            path=chunk_path,
        ))
    return dataset

def get_sub_paths(self, root_dir: str, sub_names: List[str], trim=None):

Check the existence of directories

    assert os.path.isdir(root_dir)
    # TODO: sub_dirs redundancy
    sub_dirs = []
    for sub_name in sub_names:
        sub_dir = os.path.join(root_dir, sub_name)
        assert os.path.isdir(sub_dir), '"%s" is not dir.' % sub_dir
        sub_dirs.append(sub_dir)

    paths = []
    d = os.listdir(sub_dirs[0])
    d = d[:trim] if trim else d
    for file_name in d:
        sub_paths = [os.path.join(sub_dirs[0], file_name)]
        name = os.path.splitext(file_name)[0]
        for ext in sub_names[1:]:
            sub_path = os.path.join(root_dir, ext, name + '.' + ext)
            assert os.path.exists(sub_path)
            sub_paths.append(sub_path)
        paths.append(sub_paths)

    return paths
Un1rkee commented 1 year ago

遇到同样的问题,希望能解答一下 @CZWin32768 ,万分感谢!

MathamPollard commented 1 year ago

遇到同样的问题,希望能解答一下 @CZWin32768 ,万分感谢!

我解决了,你首先把test下的Structure改名为json,然后运行./SciTSR/scitsr/data/rel_gen.py就可以根据json产生rel了,记得在rel_gen.py里面改一下路径。train里的Structure最好也改名为json