def read_table_to_peaks(infile,
has_header=True, mz_col=1, rtime_col=2, intensity=(11,21), feature_id=None,
full_extract=True, max_col=21,
delimiter='\t'):
'''
Read a text feature table, and
return list of peaks, e.g. [ {
'id_number': 555,
'mz': 133.0970,
'apex': 654,
'height': 14388.0,
}, ... ]
full_extract: to keep all fields in output as strings, only if has_header.
feature_id: if None, create id for each peak/feature.
'''
def _make_id(ii, mz, rt):
return 'F' + str(ii) + '_' + str(round(mz, 6)) + '@' + str(round(rt, 2))
list_peaks = []
w = open(infile).readlines()
if has_header:
header = w[0].rstrip().split(delimiter)[:max_col]
w = w[1:]
ii = 0
for line in w:
a = line.split(delimiter)[:max_col] # not rstrip, so trailing EOL will be carried forward
if len(a) >= max(mz_col, rtime_col):
mz, rt = float(a[mz_col]), float(a[rtime_col])
intensities = [float(x) for x in a[intensity[0]: intensity[1]]]
if feature_id != None:
fid = a[feature_id].strip()
else:
ii += 1
fid = _make_id(ii, mz, rt)
peak = {'id_number': fid, 'id': fid,
'mz': mz, 'rtime': rt, 'apex': rt,
'representative_intensity': sum(intensities) / max(1, len(intensities))}
if has_header and full_extract:
# will remove redundant fields
peak2 = dict(zip(header, a))
peak2.update(peak)
peak = peak2
list_peaks.append( peak )
return list_peaks