Closed pickpingk closed 5 months ago
from tqdm import tqdm def index_file_generate(file_path, index_file, bin_size=1000): """ index the dbfile perl -> python defult bin size 1000 """ regions = {} file_size = os.path.getsize(file_path) pbar = tqdm(total=file_size) offset = 0 with open(file_path, 'r') as rf, open(index_file, 'w') as wf: wf.write(f"#BIN\t{bin_size}\t{file_size}\n") for lines in rf: if lines.startswith("#"): offset += len(lines) continue line = lines.strip() chrom,start = line.split('\t')[:2] start = int(start) curbin = start - (start % bin_size) regions_keys = f'{chrom}\t{curbin}' if regions_keys not in regions: regions[regions_keys] = {"min":offset, "max":offset+len(lines)} else: regions[regions_keys]["max"] = offset+len(lines) offset += len(lines) pbar.update(len(lines)) for k in sorted(regions.keys()): wf.write(f"{k}\t{regions[k]['min']}\t{regions[k]['max']}\n") pbar.close() print('generate index successfully')
Maybe you need change something with the code?
Maybe you need change something with the code?