cpanse / uvpd

Ultra HRMS in combination with UVPD fragmentation for enhanced structural identification of organic micropollutants
https://doi.org/10.3390/molecules25184189
0 stars 0 forks source link

How to extract the peaklist? - in-silico `frag.generateFragments(molecule, 1)` result is huge #7

Closed cpanse closed 5 years ago

cpanse commented 5 years ago
library(metfRag)

smiles <- "CC(C)(C)C(O)C(OC1=CC=C(Cl)C=C1)N1C=NC=N1"
molecule<-parse.smiles(smiles)[[1]]

#calculate the fragments
fragments <- frag.generateFragments(molecule, 1)

length(fragments)

looking at the 1st result the question raises: How to extract the peaklist?


R> fragments[[1]]
[1] "Java-Object{AtomContainer(2077528955, #A:20, Atom(1207608476, S:C, H:1, AtomType(1207608476, N:C.sp2, MBO:DOUBLE, BOS:4.0, FC:0, H:SP2, NC:3, EV:4, Isotope(1207608476, Element(1207608476, S:C, ID:a5, AN:6)))), Atom(842179210, S:C, H:1, AtomType(842179210, N:C.sp2, MBO:DOUBLE, BOS:4.0, FC:0, H:SP2, NC:3, EV:4, Isotope(842179210, Element(842179210, S:C, ID:a3, AN:6)))), Atom(686989583, S:C, H:1, AtomType(686989583, N:C.sp2, MBO:DOUBLE, BOS:4.0, FC:0, H:SP2, NC:3, EV:4, Isotope(686989583, Element(686989583, S:C, ID:a6, AN:6)))), Atom(1944201789, S:C, H:1, AtomType(1944201789, N:C.sp2, MBO:DOUBLE, BOS:4.0, FC:0, H:SP2, NC:3, EV:4, Isotope(1944201789, Element(1944201789, S:C, ID:a4, AN:6)))), Atom(2146338580, S:C, H:0, AtomType(2146338580, N:C.sp2, MBO:DOUBLE, BOS:4.0, FC:0, H:SP2, NC:3, EV:4, Isotope(2146338580, Element(2146338580, S:C, ID:a9, AN:6)))), Atom(1110031167, S:C, H:0, AtomType(1110031167, N:C.sp2, MBO:DOUBLE, BOS:4.0, FC:0, H:SP2, NC:3, EV:4, Isotope(1110031167, Element(1110031167, S:C, ID:a10, AN:6)))), Atom(730923082, S:C, H:1, AtomType(730923082, N:C.sp3, MBO:SINGLE, BOS:4.0, FC:0, H:SP3, NC:4, EV:4, Isotope(730923082, Element(730923082, S:C, ID:a12, AN:6)))), Atom(1456339771, S:C, H:1, AtomType(1456339771, N:C.sp3, MBO:SINGLE, BOS:4.0, FC:0, H:SP3, NC:4, EV:4, Isotope(1456339771, Element(1456339771, S:C, ID:a11, AN:6)))), Atom(550302731, S:C, H:0, AtomType(550302731, N:C.sp3, MBO:SINGLE, BOS:4.0, FC:0, H:SP3, NC:4, EV:4, Isotope(550302731, Element(550302731, S:C, ID:a13, AN:6)))), Atom(854487022, S:C, H:3, AtomType(854487022, N:C.sp3, MBO:SINGLE, BOS:4.0, FC:0, H:SP3, NC:4, EV:4, Isotope(854487022, Element(854487022, S:C, ID:a0, AN:6)))), Atom(1292838001, S:C, H:3, AtomType(1292838001, N:C.sp3, MBO:SINGLE, BOS:4.0, FC:0, H:SP3, NC:4, EV:4, Isotope(1292838001, Element(1292838001, S:C, ID:a1, AN:6)))), Atom(1899223686, S:C, H:3, AtomType(1899223686, N:C.sp3, MBO:SINGLE, BOS:4.0, FC:0, H:SP3, NC:4, EV:4, Isotope(1899223686, Element(1899223686, S:C, ID:a2, AN:6)))), Atom(800281454, S:Cl, H:0, AtomType(800281454, N:Cl, MBO:SINGLE, BOS:1.0, FC:0, H:SP3, NC:1, EV:1, Isotope(800281454, Element(800281454, S:Cl, ID:a14, AN:17)))), Atom(379478400, S:N, H:0, AtomType(379478400, N:N.sp2, MBO:DOUBLE, BOS:3.0, FC:0, H:SP2, NC:2, EV:3, Isotope(379478400, Element(379478400, S:N, ID:a15, AN:7)))), Atom(259219561, S:C, H:1, AtomType(259219561, N:C.sp2, MBO:DOUBLE, BOS:4.0, FC:0, H:SP2, NC:3, EV:4, Isotope(259219561, Element(259219561, S:C, ID:a7, AN:6)))), Atom(1839337592, S:C, H:1, AtomType(1839337592, N:C.sp2, MBO:DOUBLE, BOS:4.0, FC:0, H:SP2, NC:3, EV:4, Isotope(1839337592, Element(1839337592, S:C, ID:a8, AN:6)))), Atom(900636745, S:N, H:0, AtomType(900636745, N:N.sp2, MBO:DOUBLE, BOS:3.0, FC:0, H:SP2, NC:2, EV:3, Isotope(900636745, Element(900636745, S:N, ID:a16, AN:7)))), Atom(1912960603, S:N, H:0, AtomType(1912960603, N:N.planar3, MBO:SINGLE, BOS:3.0, FC:0, H:PLANAR3, NC:3, EV:3, Isotope(1912960603, Element(1912960603, S:N, ID:a17, AN:7)))), Atom(1201173334, S:O, H:1, AtomType(1201173334, N:O.sp3, MBO:SINGLE, BOS:2.0, FC:0, H:SP3, NC:2, EV:2, Isotope(1201173334, Element(1201173334, S:O, ID:a18, AN:8)))), Atom(586127428, S:O, H:0, AtomType(586127428, N:O.sp3, MBO:SINGLE, BOS:2.0, FC:0, H:SP3, NC:2, EV:2, Isotope(586127428, Element(586127428, S:O, ID:a19, AN:8)))), #B:21, Bond(1652807864, #O:SINGLE, #S:NONE, #A:2, Atom(1207608476, S:C, H:1, AtomType(1207608476, N:C.sp2, MBO:DOUBLE, BOS:4.0, FC:0, H:SP2, NC:3, EV:4, Isotope(1207608476, Element(1207608476, S:C, ID:a5, AN:6)))), Atom(842179210, S:C, H:1, AtomType(842179210, N:C.sp2, MBO:DOUBLE, BOS:4.0, FC:0, H:SP2, NC:3, EV:4, Isotope(842179210, Element(842179210, S:C, ID:a3, AN:6)))), ElectronContainer(1652807864EC:2)), Bond(1628998132, #O:DOUBLE, #S:NONE, #A:2, Atom(686989583, S:C, H:1, AtomType(686989583, N:C.sp2, MBO:DOUBLE, BOS:4.0, FC:0, H:SP2, NC:3, EV:4, Isotope(686989583, Element(686989583, S:C, ID:a6, AN:6)))), Atom(1944201789, S:C, H:1, AtomType(1944201789, N:C.sp2, MBO:DOUBLE, BOS:4.0, FC:0, H:SP2, NC:3, EV:4, Isotope(1944201789, Element(1944201789, S:C, ID:a4, AN:6)))), ElectronContainer(1628998132EC:4)), Bond(1223850219, #O:DOUBLE, #S:NONE, #A:2, Atom(2146338580, S:C, H:0, AtomType(2146338580, N:C.sp2, MBO:DOUBLE, BOS:4.0, FC:0, H:SP2, NC:3, EV:4, Isotope(2146338580, Element(2146338580, S:C, ID:a9, AN:6)))), Atom(842179210, S:C, H:1, AtomType(842179210, N:C.sp2, MBO:DOUBLE, BOS:4.0, FC:0, H:SP2, NC:3, EV:4, Isotope(842179210, Element(842179210, S:C, ID:a3, AN:6)))), ElectronContainer(1223850219EC:4)), Bond(497208183, #O:SINGLE, #S:NONE, #A:2, Atom(2146338580, S:C, H:0, AtomType(2146338580, N:C.sp2, MBO:DOUBLE, BOS:4.0, FC:0, H:SP2, NC:3, EV:4, Isotope(2146338580, Element(2146338580, S:C, ID:a9, AN:6)))), Atom(1944201789, S:C, H:1, AtomType(1944201789, N:C.sp2, MBO:DOUBLE, BOS:4.0, FC:0, H:SP2, NC:3, EV:4, Isotope(1944201789, Element(1944201789, S:C, ID:a4, AN:6)))), ElectronContainer(497208183EC:2)), Bond(997055773, #O:DOUBLE, #S:NONE, #A:2, Atom(1110031167, S:C, H:0, AtomType(1110031167, N:C.sp2, MBO:DOUBLE, BOS:4.0, FC:0, H:SP2, NC:3, EV:4, Isotope(1110031167, Element(1110031167, S:C, ID:a10, AN:6)))), Atom(1207608476, S:C, H:1, AtomType(1207608476, N:C.sp2, MBO:DOUBLE, BOS:4.0, FC:0, H:SP2, NC:3, EV:4, Isotope(1207608476, Element(1207608476, S:C, ID:a5, AN:6)))), ElectronContainer(997055773EC:4)), Bond(1063980005, #O:SINGLE, #S:NONE, #A:2, Atom(1110031167, S:C, H:0, AtomType(1110031167, N:C.sp2, MBO:DOUBLE, BOS:4.0, FC:0, H:SP2, NC:3, EV:4, Isotope(1110031167, Element(1110031167, S:C, ID:a10, AN:6)))), Atom(686989583, S:C, H:1, AtomType(686989583, N:C.sp2, MBO:DOUBLE, BOS:4.0, FC:0, H:SP2, NC:3, EV:4, Isotope(686989583, Element(686989583, S:C, ID:a6, AN:6)))), ElectronContainer(1063980005EC:2)), Bond(35534346, #O:SINGLE, #S:NONE, #A:2, Atom(730923082, S:C, H:1, AtomType(730923082, N:C.sp3, MBO:SINGLE, BOS:4.0, FC:0, H:SP3, NC:4, EV:4, Isotope(730923082, Element(730923082, S:C, ID:a12, AN:6)))), Atom(1456339771, S:C, H:1, AtomType(1456339771, N:C.sp3, MBO:SINGLE, BOS:4.0, FC:0, H:SP3, NC:4, EV:4, Isotope(1456339771, Element(1456339771, S:C, ID:a11, AN:6)))), ElectronContainer(35534346EC:2)), Bond(1537471098, #O:SINGLE, #S:NONE, #A:2, Atom(550302731, S:C, H:0, AtomType(550302731, N:C.sp3, MBO:SINGLE, BOS:4.0, FC:0, H:SP3, NC:4, EV:4, Isotope(550302731, Element(550302731, S:C, ID:a13, AN:6)))), Atom(854487022, S:C, H:3, AtomType(854487022, N:C.sp3, MBO:SINGLE, BOS:4.0, FC:0, H:SP3, NC:4, EV:4, Isotope(854487022, Element(854487022, S:C, ID:a0, AN:6)))), ElectronContainer(1537471098EC:2)), Bond(1490509465, #O:SINGLE, #S:NONE, #A:2, Atom(550302731, S:C, H:0, AtomType(550302731, N:C.sp3, MBO:SINGLE, BOS:4.0, FC:0, H:SP3, NC:4, EV:4, Isotope(550302731, Element(550302731, S:C, ID:a13, AN:6)))), Atom(1292838001, S:C, H:3, AtomType(1292838001, N:C.sp3, MBO:SINGLE, BOS:4.0, FC:0, H:SP3, NC:4, EV:4, Isotope(1292838001, Element(1292838001, S:C, ID:a1, AN:6)))), ElectronContainer(1490509465EC:2)), Bond(122114483, #O:SINGLE, #S:NONE, #A:2, Atom(550302731, S:C, H:0, AtomType(550302731, N:C.sp3, MBO:SINGLE, BOS:4.0, FC:0, H:SP3, NC:4, EV:4, Isotope(550302731, Element(550302731, S:C, ID:a13, AN:6)))), Atom(1899223686, S:C, H:3, AtomType(1899223686, N:C.sp3, MBO:SINGLE, BOS:4.0, FC:0, H:SP3, NC:4, EV:4, Isotope(1899223686, Element(1899223686, S:C, ID:a2, AN:6)))), ElectronContainer(122114483EC:2)), Bond(1947896119, #O:SINGLE, #S:NONE, #A:2, Atom(550302731, S:C, H:0, AtomType(550302731, N:C.sp3, MBO:SINGLE, BOS:4.0, FC:0, H:SP3, NC:4, EV:4, Isotope(550302731, Element(550302731, S:C, ID:a13, AN:6)))), Atom(1456339771, S:C, H:1, AtomType(1456339771, N:C.sp3, MBO:SINGLE, BOS:4.0, FC:0, H:SP3, NC:4, EV:4, Isotope(1456339771, Element(1456339771, S:C, ID:a11, AN:6)))), ElectronContainer(1947896119EC:2)), Bond(812553708, #O:SINGLE, #S:NONE, #A:2, Atom(800281454, S:Cl, H:0, AtomType(800281454, N:Cl, MBO:SINGLE, BOS:1.0, FC:0, H:SP3, NC:1, EV:1, Isotope(800281454, Element(800281454, S:Cl, ID:a14, AN:17)))), Atom(2146338580, S:C, H:0, AtomType(2146338580, N:C.sp2, MBO:DOUBLE, BOS:4.0, FC:0, H:SP2, NC:3, EV:4, Isotope(2146338580, Element(2146338580, S:C, ID:a9, AN:6)))), ElectronContainer(812553708EC:2)), Bond(790722099, #O:SINGLE, #S:NONE, #A:2, Atom(379478400, S:N, H:0, AtomType(379478400, N:N.sp2, MBO:DOUBLE, BOS:3.0, FC:0, H:SP2, NC:2, EV:3, Isotope(379478400, Element(379478400, S:N, ID:a15, AN:7)))), Atom(259219561, S:C, H:1, AtomType(259219561, N:C.sp2, MBO:DOUBLE, BOS:4.0, FC:0, H:SP2, NC:3, EV:4, Isotope(259219561, Element(259219561, S:C, ID:a7, AN:6)))), ElectronContainer(790722099EC:2)), Bond(173214986, #O:DOUBLE, #S:NONE, #A:2, Atom(379478400, S:N, H:0, AtomType(379478400, N:N.sp2, MBO:DOUBLE, BOS:3.0, FC:0, H:SP2, NC:2, EV:3, Isotope(379478400, Element(379478400, S:N, ID:a15, AN:7)))), Atom(1839337592, S:C, H:1, AtomType(1839337592, N:C.sp2, MBO:DOUBLE, BOS:4.0, FC:0, H:SP2, NC:3, EV:4, Isotope(1839337592, Element(1839337592, S:C, ID:a8, AN:6)))), ElectronContainer(173214986EC:4)), Bond(2122049087, #O:DOUBLE, #S:NONE, #A:2, Atom(900636745, S:N, H:0, AtomType(900636745, N:N.sp2, MBO:DOUBLE, BOS:3.0, FC:0, H:SP2, NC:2, EV:3, Isotope(900636745, Element(900636745, S:N, ID:a16, AN:7)))), Atom(259219561, S:C, H:1, AtomType(259219561, N:C.sp2, MBO:DOUBLE, BOS:4.0, FC:0, H:SP2, NC:3, EV:4, Isotope(259219561, Element(259219561, S:C, ID:a7, AN:6)))), ElectronContainer(2122049087EC:4)), Bond(27362884, #O:SINGLE, #S:NONE, #A:2, Atom(1912960603, S:N, H:0, AtomType(1912960603, N:N.planar3, MBO:SINGLE, BOS:3.0, FC:0, H:PLANAR3, NC:3, EV:3, Isotope(1912960603, Element(1912960603, S:N, ID:a17, AN:7)))), Atom(1839337592, S:C, H:1, AtomType(1839337592, N:C.sp2, MBO:DOUBLE, BOS:4.0, FC:0, H:SP2, NC:3, EV:4, Isotope(1839337592, Element(1839337592, S:C, ID:a8, AN:6)))), ElectronContainer(27362884EC:2)), Bond(1387210478, #O:SINGLE, #S:NONE, #A:2, Atom(1912960603, S:N, H:0, AtomType(1912960603, N:N.planar3, MBO:SINGLE, BOS:3.0, FC:0, H:PLANAR3, NC:3, EV:3, Isotope(1912960603, Element(1912960603, S:N, ID:a17, AN:7)))), Atom(730923082, S:C, H:1, AtomType(730923082, N:C.sp3, MBO:SINGLE, BOS:4.0, FC:0, H:SP3, NC:4, EV:4, Isotope(730923082, Element(730923082, S:C, ID:a12, AN:6)))), ElectronContainer(1387210478EC:2)), Bond(1523553211, #O:SINGLE, #S:NONE, #A:2, Atom(1912960603, S:N, H:0, AtomType(1912960603, N:N.planar3, MBO:SINGLE, BOS:3.0, FC:0, H:PLANAR3, NC:3, EV:3, Isotope(1912960603, Element(1912960603, S:N, ID:a17, AN:7)))), Atom(900636745, S:N, H:0, AtomType(900636745, N:N.sp2, MBO:DOUBLE, BOS:3.0, FC:0, H:SP2, NC:2, EV:3, Isotope(900636745, Element(900636745, S:N, ID:a16, AN:7)))), ElectronContainer(1523553211EC:2)), Bond(1876443073, #O:SINGLE, #S:NONE, #A:2, Atom(1201173334, S:O, H:1, AtomType(1201173334, N:O.sp3, MBO:SINGLE, BOS:2.0, FC:0, H:SP3, NC:2, EV:2, Isotope(1201173334, Element(1201173334, S:O, ID:a18, AN:8)))), Atom(1456339771, S:C, H:1, AtomType(1456339771, N:C.sp3, MBO:SINGLE, BOS:4.0, FC:0, H:SP3, NC:4, EV:4, Isotope(1456339771, Element(1456339771, S:C, ID:a11, AN:6)))), ElectronContainer(1876443073EC:2)), Bond(1328238652, #O:SINGLE, #S:NONE, #A:2, Atom(586127428, S:O, H:0, AtomType(586127428, N:O.sp3, MBO:SINGLE, BOS:2.0, FC:0, H:SP3, NC:2, EV:2, Isotope(586127428, Element(586127428, S:O, ID:a19, AN:8)))), Atom(1110031167, S:C, H:0, AtomType(1110031167, N:C.sp2, MBO:DOUBLE, BOS:4.0, FC:0, H:SP2, NC:3, EV:4, Isotope(1110031167, Element(1110031167, S:C, ID:a10, AN:6)))), ElectronContainer(1328238652EC:2)), Bond(195984832, #O:SINGLE, #S:NONE, #A:2, Atom(586127428, S:O, H:0, AtomType(586127428, N:O.sp3, MBO:SINGLE, BOS:2.0, FC:0, H:SP3, NC:2, EV:2, Isotope(586127428, Element(586127428, S:O, ID:a19, AN:8)))), Atom(730923082, S:C, H:1, AtomType(730923082, N:C.sp3, MBO:SINGLE, BOS:4.0, FC:0, H:SP3, NC:4, EV:4, Isotope(73092308
cpanse commented 5 years ago
for (i in fragments) {
  try(do.aromaticity(i))
  try(do.typing(i))
  try(do.isotopes(i))
}
sapply(fragments, rcdk::get.exact.mass)
rcdk::get.exact.mass(fragments[[101]])

[1] 278.106

rcdk::get.smiles(fragments[[101]])

"C([CH])=C(C=C[C]Cl)OC([CH]C(C)(C)C)N1C=NC=N1"

cpanse commented 5 years ago
plot(table(sapply(fragments, rcdk::get.exact.mass)))
Screenshot 2019-03-26 at 14 44 06
cpanse commented 5 years ago
in-silico.fragments <- data.frame(MH1P=sapply(fragments, rcdk::get.exact.mass) + 1.0072,     SMILES=as.character(sapply(fragments, rcdk::get.smiles)))