Open yguitton opened 6 years ago
here a first code for : a modified read.msp file then the idea is to use the metaMS write.msp function
read.msp<-function (file, only.org = FALSE, org.set = c("C", "H", "D",
"N", "O", "P", "S"), noNumbers = NULL)
{
get.text.value <- function(x, field, do.err = TRUE) {
woppa <- strsplit(x, field)
woppa.lengths <- sapply(woppa, length)
if (all(woppa.lengths == 2)) {
sapply(woppa, function(y) gsub("^ +", "", y[2]))
}
else {
if (do.err) {
stop(paste("Invalid field", field, "in", x[woppa.lengths !=
2]))
}
else {
NA
}
}
}
is.org <- function(strs, org.set) {
formulas <- get.text.value(strs, "Formula:")
org.string <- paste("[", paste(org.set, collapse = ""),
"]", collapse = "")
suppressWarnings(which(!is.na(as.numeric(gsub(org.string,
"", formulas)))))
}
read.compound <- function(strs, noNumbers) {
# print(strs[1])
if (is.null(noNumbers))
noNumbers <- c("[Nn][Aa][Mm][Ee]", "CAS?", "stdFile", "date",
"validated", "ChemspiderID", "SMILES", "InChI",
"Class", "[Cc][Oo][Mm][Mm][Ee][Nn][Tt]?", "csLinks","[fF][oO][Rr][Mm]?","[Ss][oO][Uu][Rr][Cc][Ee],RW")
fields.idx <- grep(":", strs)
fields <- sapply(strsplit(strs[fields.idx], ":"), "[[",
1)
# pk.idx <- which(fields == "[Nn][Uu][Mm] [Pp][Ee][Aa][Kk][Ss]")
pk.idx<-grep("[Nn][Uu][Mm] [Pp][Ee][Aa][Kk][Ss]",fields)
if (length(pk.idx) == 0)
stop("No spectrum found")
cmpnd <- lapply(fields.idx[-pk.idx], function(x) get.text.value(strs[x],
paste(fields[x], ":", sep = ""), do.err = FALSE))
# Rename essential fields for metaMS
fields<-gsub("NAME","Name",fields)
fields<-gsub("RT","rt",fields)
fields<-gsub("RI","std.RI",fields)
fields<-gsub("CASNO","CAS",fields)
fields<-gsub("NUM PEAKS","Num Peaks",fields)
names(cmpnd) <- fields[-pk.idx]
#
# cnvrt.idx <- which(!(names(cmpnd) %in% noNumbers))
cnvrt.idx<-which(!grepl(paste(noNumbers,collapse="|"),fields)==FALSE)
cmpnd[cnvrt.idx] <- lapply(cmpnd[cnvrt.idx], function(x) {
if (is.na((y <- as.numeric(x)))) {
x
}
else {
y
}
})
nlines <- length(strs)
npeaks <- as.numeric(get.text.value(strs[pk.idx], "[Nn][Uu][Mm] [Pp][Ee][Aa][Kk][Ss]:?[:space]"))
peaks.idx <- (pk.idx + 1):nlines
#####
if(length(grep(";",strs[peaks.idx]))>1){
pks <- gsub("^ +", "", unlist(strsplit(strs[peaks.idx], ";")))
}
if(length(grep(";",strs[peaks.idx]))<1){
pks <- gsub("^ +", "", unlist(strsplit(strs[peaks.idx], "\\)")))
pks<-gsub("\\(+","",pks)
}
####
pks <- pks[pks != ""]
if (length(pks) != npeaks)
stop("Not the right number of peaks in compound",
cmpnd$Name)
pklst <- strsplit(pks, " ")
pklst <- lapply(pklst, function(x) x[x != ""])
cmz <- as.numeric(sapply(pklst, "[[", 1))
cintens <- as.numeric(sapply(pklst, "[[", 2))
finaltab <- matrix(c(cmz, cintens), ncol = 2)
if (any(table(cmz) > 1)) {
warning("Duplicate mass in compound ", cmpnd$Name,
" (CAS ", cmpnd$CAS, ")... summing up intensities")
finaltab <- aggregate(finaltab[, 2], by = list(finaltab[,
1]), FUN = sum)
}
colnames(finaltab) <- c("mz", "intensity")
# uncomment below to put NULL in remove cmpnd without RT
# if(!is.na(match("rt",names(cmpnd))))
c(cmpnd, list(pspectrum = finaltab))
}
huhn <- scan(file, what = "", sep = "\n", quiet = TRUE)
starts <- which(regexpr("[Nn][Aa][Mm][Ee]:?[:space]", huhn) == 1)
ends <- c(starts[-1] - 1, length(huhn))
if (only.org) {
formulas <- which(regexpr("Formula:", huhn) == 1)
if (length(formulas) > 0) {
orgs <- is.org(huhn[formulas], org.set)
starts <- starts[orgs]
ends <- ends[orgs]
}
}
lapply(1:length(starts), function(i) read.compound(huhn[starts[i]:ends[i]],
noNumbers = noNumbers))
}
When you talk of msp file do you mean the databse as input ?
Yes, as some msp files for metals should contain some fields
Le mer. 26 juin 2019 09:19, Julien Saint-Vanne notifications@github.com a écrit :
When you talk of msp file do you mean the databse as input ?
— You are receiving this because you were assigned. Reply to this email directly, view it on GitHub https://github.com/workflow4metabolomics/metaMS/issues/23?email_source=notifications&email_token=ABI76KNAUDNRRGWACXEFHN3P4MKAVA5CNFSM4EN7OIOKYY3PNVWWK3TUL52HS4DFVREXG43VMVBW63LNMVXHJKTDN5WW2ZLOORPWSZGODYSSZJQ#issuecomment-505752742, or mute the thread https://github.com/notifications/unsubscribe-auth/ABI76KNM756MKFLYEYENT2DP4MKAVANCNFSM4EN7OIOA .
some msp files are not fully compatible with metaMS for exemple msp file fom AMDIS have ( mz int) instead of mz int; as mass spectrum descriptor
the idea for wrapper is 1 load msp file with a new read.msp that can deal with more msp format then use write.msp format to create a converted msp file