Villen-Lab / pyAscore

A python package for fast post translational modification localization, powered by Cython.
https://pyascore.readthedocs.io/
MIT License
18 stars 5 forks source link

mass cannot be matched #52

Open YoujiaMa opened 1 month ago

YoujiaMa commented 1 month ago

hi pyAscore team. I have a problem that my mass spectrometry data had two modifications to K, and when I submitted the code, the following example spectral data data seemed to be wrong. , and two K modifications become the same.

## input code
pyascore.IdentificationParser(psm_file, "pepXML")

###  pepXML infomation
<aminoacid_modification aminoacid="K" massdiff="42.010565" mass="170.105528446600005" variable="Y" binary="N" description="Acetyl (K)"/>
<aminoacid_modification aminoacid="C" massdiff="57.021464000000002" mass="160.030648985200003" variable="Y" binary="N" description="Carbamidomethyl (C)"/>
<aminoacid_modification aminoacid="K" massdiff="43.005814000000001" mass="171.100777414700019" variable="Y" binary="N" description="Carbamyl (K)"/>
....
<spectrum_query spectrum="01CPTAC_UCEC_A_PNNL_20180621_B1S1_f03.3256.3256.2" start_scan="3256" end_scan="3256" precursor_neutral_mass="1155.692700233000096" assumed_charge="2" index="3255" retention_time_sec="638.6" >
<search_result>
    <search_hit hit_rank="1" peptide="KKSLNPR" peptide_prev_aa="R" peptide_next_aa="R" protein="DECOY_sp|Q5TCZ1|SPD2A_HUMAN" num_tot_proteins="1" num_matched_ions="0" tot_num_ions="0" calc_neutral_pep_mass="1155.692700233000096" massdiff="0.0" num_tol_term="1" num_missed_cleavages="0" is_rejected="0" protein_descr="Protein No. 1">
        <modification_info modified_peptide="n[230]K[171]K[170]SLNPR" mod_nterm_mass="230.170757031900024">
            <mod_aminoacid_mass position="1" mass="171.100777414700019"/>
            <mod_aminoacid_mass position="2" mass="170.105528446599976"/>
        </modification_info>
        <search_score name="Posterior Error Probability" value="0.711939"/>
        <search_score name="Posterior Error Probability" value="0.711939"/>
        <analysis_result analysis="peptideprophet">
            <peptideprophet_result probability="0.288061" all_ntt_prob="(0.0000,0.0000,0.288061)"/>
        </analysis_result>
    </search_hit>
</search_result>
</spectrum_query>
...

## ouput data
scan    charge_state    score   peptide mod_positions   mod_masses
....
3256    2       KKSLNPR [0 0 2] [230.17076111  42.010565    42.01056677]
....
YoujiaMa commented 1 month ago

and i have another question .I submitted a peptide with two K ( 42 , 229 ) that have different modifications, and pyascore seems to combine the two modifications into one ( 271)

mod_mass = 42.010565
ascore = pyascore.PyAscore(bin_size=100., n_top=10,
                  mod_group="K",
                  mod_mass=mod_mass,
                  mz_error=.05)

psm = {'scan': 3287, 
 'charge_state': 3, 
 'score': None, 
 'peptide': 'DLNKKAPR', 
 'mod_positions': np.array([4, 5]), 
 'mod_masses': np.array([ 42.01056677, 229.16294104])}

spectrum = {'ms_level': 2,
 'precursor_mz': 404.914245605469,
 'precursor_charge': 3,
 'mz_values': np.array([110.07141876, 112.08733368, 113.07111359, 115.03938293,
        115.08678436, 115.33179474, 116.07073975, 116.2845459 ,
        120.08106995, 122.01766205, 124.06364441, 126.09165192,
        127.09474945, 128.77972412, 129.10255432, 129.11326599,
        133.0606842 , 136.07579041, 140.0818634 , 143.11802673,
        143.49917603, 147.11273193, 149.02334595, 156.07667542,
        157.11747742, 158.09274292, 158.96482849, 164.92024231,
        167.54089355, 174.47944641, 175.11494446, 175.11935425,
        178.28999329, 185.65473938, 188.50109863, 190.36447144,
        193.09721375, 199.6519928 , 207.03370667, 212.13938904,
        212.14729309, 213.52363586, 217.30023193, 222.46781921,
        225.0438385 , 229.17080688, 233.06929016, 234.13340759,
        237.1348877 , 242.17889404, 243.16801453, 249.09786987,
        251.08027649, 255.14538574, 256.12902832, 256.14941406,
        257.16119385, 267.05197144, 268.99401855, 271.67874146,
        272.17147827, 275.07952881, 279.69470215, 290.84637451,
        293.69259644, 301.02029419, 301.14077759, 306.85220337,
        308.20550537, 317.86584473, 319.03079224, 319.0680542 ,
        326.1817627 , 336.71582031, 343.2093811 , 348.80892944,
        350.71310425, 358.72515869, 359.22677612, 359.72824097,
        369.92462158, 371.22479248, 371.90426636, 372.56295776,
        372.7326355 , 384.90420532, 385.23599243, 388.90756226,
        390.23800659, 390.5730896 , 390.90722656, 393.49502563,
        394.91055298, 398.9105835 , 399.24212646, 399.90655518,
        400.11428833, 401.25558472, 404.57958984, 404.72085571,
        404.91394043, 405.07366943, 405.13671875, 405.24798584,
        405.58248901, 421.76867676, 422.27084351, 422.90350342,
        422.98986816, 435.27349854, 435.76641846, 436.26812744,
        441.76608276, 442.26879883, 449.29064941, 456.78729248,
        457.2875061 , 457.78939819, 471.28503418, 471.78613281,
        480.29025269, 480.78964233, 505.81430054, 506.31677246,
        506.80203247, 513.31384277, 519.31976318, 519.81164551,
        526.79168701, 528.32501221, 528.81671143, 529.31774902,
        570.33709717, 627.3583374 , 658.37213135, 717.44476318,
        728.40631104, 815.43908691, 870.52423096, 870.70910645,
        900.51550293, 913.57122803, 941.56256104]),
 'intensity_values': np.array([2.67419995e+03, 6.78697412e+03, 4.57884888e+02, 5.24235413e+02,
        2.03178333e+03, 4.04506165e+02, 1.30953601e+03, 3.64795288e+02,
        6.37048584e+02, 3.72499481e+02, 4.22867035e+02, 1.32507920e+04,
        8.11848389e+02, 4.03839447e+02, 4.05803638e+03, 8.41870483e+02,
        7.89050781e+02, 1.17650513e+03, 3.38443945e+03, 5.32882935e+02,
        5.04560425e+02, 6.29092163e+02, 7.90641541e+02, 6.25357910e+02,
        5.16468994e+02, 3.97850415e+03, 5.03887604e+02, 2.51894336e+03,
        5.24506226e+02, 4.81720520e+02, 5.10110809e+02, 9.13825195e+03,
        2.85288794e+03, 6.01369873e+02, 5.63865967e+02, 5.25845886e+02,
        6.55010071e+02, 2.61344678e+03, 6.53163391e+02, 2.89681152e+03,
        5.08147766e+02, 5.37070129e+02, 5.30136963e+02, 5.40953613e+02,
        1.04758435e+03, 1.13702656e+04, 2.51816504e+03, 7.34615356e+02,
        1.33930908e+03, 7.53655396e+02, 5.39131836e+03, 7.71412109e+02,
        9.96350525e+02, 2.04202500e+04, 1.33184326e+03, 7.93396240e+02,
        6.52877502e+02, 6.67353516e+02, 9.32512878e+02, 1.21413037e+03,
        3.08106128e+03, 8.20862610e+02, 4.97058350e+03, 3.47244189e+03,
        3.90806885e+03, 2.10523633e+03, 7.99178406e+02, 9.27055054e+02,
        9.25933350e+02, 6.66753418e+02, 2.47814893e+03, 1.03457568e+03,
        1.27286475e+03, 9.09113586e+02, 2.76232764e+03, 6.17919983e+02,
        1.39982996e+03, 7.48855347e+02, 1.86425215e+04, 1.34548083e+03,
        8.48067993e+02, 8.47590759e+02, 2.18355591e+03, 8.71240112e+02,
        2.37600171e+03, 2.86484082e+03, 2.46044678e+03, 2.55342822e+03,
        1.93025410e+04, 5.86043408e+03, 3.44345776e+03, 9.13646484e+02,
        2.39118433e+03, 6.55146045e+03, 3.79227661e+03, 9.36828979e+02,
        7.08693115e+02, 1.27757593e+03, 5.36915918e+03, 2.17958203e+03,
        6.16471438e+05, 2.92570825e+03, 1.40048328e+03, 1.81743156e+05,
        1.24000127e+04, 2.35348145e+04, 6.29400000e+03, 2.47444434e+03,
        6.87479919e+02, 3.31169507e+03, 3.61198555e+04, 8.81428418e+03,
        3.53371729e+03, 7.53081604e+02, 8.72993225e+02, 9.31250244e+02,
        6.41469102e+04, 1.25368887e+04, 4.70029766e+04, 9.12571191e+03,
        3.71725122e+03, 6.76778076e+02, 2.43835083e+03, 7.69159424e+02,
        9.80107056e+02, 7.98876587e+02, 1.19659998e+03, 2.22035181e+03,
        6.87959290e+02, 6.06202393e+03, 3.19228750e+04, 9.63974219e+03,
        7.56964783e+02, 4.68911816e+03, 1.28145557e+03, 3.79895337e+03,
        3.51602295e+03, 3.26391479e+03, 1.34672217e+03, 7.17988159e+02,
        9.34804932e+02, 7.24590088e+02, 1.03818188e+03])}

mod_select = np.isclose(psm["mod_masses"], mod_mass)
nmods = np.sum(mod_select)

# Gather other modifications into aux mods
aux_mod_pos = np.array(psm["mod_positions"])[~mod_select].astype(np.uint32)
aux_mod_masses = np.array(psm["mod_masses"])[~mod_select].astype(np.float32)

# Run scoring algorithm
ascore.score(mz_arr = spectrum["mz_values"],
                int_arr = spectrum["intensity_values"],
                peptide = psm["peptide"],
                n_of_mod = np.sum(mod_select),
                max_fragment_charge = psm["charge_state"] - 1,
                aux_mod_pos = aux_mod_pos,
                aux_mod_mass = aux_mod_masses)

print({"scan" : psm["scan"],"localized_peptide" : ascore.best_sequence,"pepscore" : ascore.best_score,"ascores" : ";".join([str(s) for s in ascore.ascores])})

### ouput data
{'scan': 3287, 'localized_peptide': 'DLNKK[271]APR', 'pepscore': 107.95252990722656, 'ascores': '17.022285'}