shuzhao-li-lab / JMS

Json's Metabolite Services
MIT License
1 stars 1 forks source link

to update io.read_table_to_peaks #20

Closed shuzhao-li closed 1 year ago

shuzhao-li commented 1 year ago
def read_table_to_peaks(infile, 
                        has_header=True, mz_col=1, rtime_col=2, intensity=(11,21), feature_id=None,
                        full_extract=True, max_col=21,
                        delimiter='\t'):
    '''
    Read a text feature table, and 
    return list of peaks, e.g. [ {
        'id_number': 555,
        'mz': 133.0970, 
        'apex': 654, 
        'height': 14388.0, 
        }, ... ]

    full_extract: to keep all fields in output as strings, only if has_header.
    feature_id: if None, create id for each peak/feature.
    '''
    def _make_id(ii, mz, rt):
        return 'F' + str(ii) + '_' + str(round(mz, 6)) + '@' + str(round(rt, 2))

    list_peaks = []
    w = open(infile).readlines()
    if has_header:
        header = w[0].rstrip().split(delimiter)[:max_col]
        w = w[1:]
    ii = 0
    for line in w:
        a = line.split(delimiter)[:max_col]   # not rstrip, so trailing EOL will be carried forward
        if len(a) >= max(mz_col, rtime_col):
            mz, rt = float(a[mz_col]), float(a[rtime_col])
            intensities = [float(x) for x in a[intensity[0]: intensity[1]]]
            if feature_id != None:
                fid = a[feature_id].strip()
            else:
                ii += 1
                fid = _make_id(ii, mz, rt)
            peak = {'id_number': fid, 'id': fid,
                    'mz': mz, 'rtime': rt, 'apex': rt, 
                    'representative_intensity': sum(intensities) / max(1, len(intensities))}
            if has_header and full_extract:
                # will remove redundant fields
                peak2 = dict(zip(header, a))
                peak2.update(peak)
                peak = peak2

            list_peaks.append( peak )

    return list_peaks
jmmitc06 commented 1 year ago

This change has been added. Closing this issue.