Closed hoelzer closed 4 years ago
#!usr/bin/env python3
input_file = '/data/prostlocal2/projects/chlamydia_comparative_study/flamingo_ribap/ribap/ribap_gene_absence_presence.csv'
input_strain_ids = '/data/prostlocal2/projects/chlamydia_comparative_study/flamingo_ribap/ribap/strain_ids.txt'
output_file = '/data/prostlocal2/projects/chlamydia_comparative_study/flamingo_ribap/ribap/gene_subsets.txt'
with open(input_strain_ids) as file:
strain_dict = {}
for line in file:
line = line.strip()
prokka_id = line.split(',')[0]
my_strain = line.split(',')[1]
strain_dict[prokka_id] = my_strain
with open(input_file) as holytable:
my_dict = {}
my_list = []
for line in holytable:
line = line.strip()
if line.startswith("Cluster_ID"):
strain = line.split('\t')
my_list = strain[3:]
for i in my_list:
my_dict[i + '_RENAMED'] = []
else:
ids = line.split('\t')
for x in line.split('\t')[3:]:
if x == "NA":
continue
else:
my_dict[strain_dict[x.split('_')[0]]].append(ids[0])
for i in my_dict:
print(i, my_dict[i])
with open(output_file, 'w') as out:
for key, value in my_dict.items():
out.write(key + '\n')
for entry in value:
out.write(entry + '\n')
# write to multiple output files
for key, value in my_dict.items():
with open(f'/data/prostlocal2/projects/chlamydia_comparative_study/flamingo_ribap/ribap/subsets/{key}_subset.txt', 'w') as out:
for entry in value:
out.write(entry + '\n')
first parse the output of the pipeline according to /data/prostlocal2/projects/chlamydia_comparative_study/flamingo_ribap/getsubsets_ribap.py
then add upsetr code