Errors found in notebook/github repo

1) Replace the link MSV000082312 with MSV000082312

2) The GNPS URL files are incompatible with the notebook at the moment so they should probably be commented. The reason is the metadata table that is quite different. Examples where the code crashes if the user retreives the URL files:

InsideLevels(md[,2:12]) #excluding columns: filename, timerun, injection order

Not enough columns to reach number 12 Replace with: InsideLevels(md[,2:4])

Replace:

 visualising the first 2 principal coordinates labelled with sample type
ggplot(pcoa_pts, aes(x = PCoA1, y = PCoA2,color = md$`ATTRIBUTE_Sample.Type`))+
geom_point(size=2.5) + labs(color ="Sample Type") +
ggtitle("Scores plot")+ xlab(paste('PCo1',var[1],'%', sep = ' ')) + ylab(paste('PCo2',var[2],'%', sep = ' '))

with (I also corrected some spelling errors):

# visualizing the first 2 principal coordinates labeled with sample type
ggplot(pcoa_pts, aes(x = PCoA1, y = PCoA2,color = md$`ATTRIBUTE_type`))+
geom_point(size=2.5) + labs(color ="Sample Type") + 
ggtitle("Scores plot")+ xlab(paste('PCo1',var[1],'%', sep = ' ')) + ylab(paste('PCo2',var[2],'%', sep = ' '))

For correct coloring according to md table sample type.

Replace:

# selecting only the filename & batch info column along with all feature intensity columns
ft_merged2 <- ft_merged %>% select(`filename`,`ATTRIBUTE_Batch`,starts_with("X")) 
head(ft_merged2,n=2)

with:

# selecting only the filename & batch info column along with all feature intensity columns
ft_merged2 <- ft_merged %>% select(`filename`,`ATTRIBUTE_Sample_Site`,starts_with("X")) 
head(ft_merged2,n=2)

Replace:

bm <- ft_merged2[,-1] %>%  #excluding filename column as we are geting only batchwise mean value
group_by(`ATTRIBUTE_Batch`) %>%  # grouping them by Batch
summarise_all(mean) %>% # getting column-wise mean
column_to_rownames('ATTRIBUTE_Batch') %>%
as.data.frame() # storing it as dataframe

with:

bm <- ft_merged2[,-1] %>%  #excluding filename column as we are geting only batchwise mean value
group_by(`ATTRIBUTE_Sample_Site`) %>%  # grouping them by Batch
summarise_all(mean) %>% # getting column-wise mean
column_to_rownames('ATTRIBUTE_Sample_Site') %>%
as.data.frame() # storing it as dataframe

Replace:

batch_df <- ft_merged2 %>%
group_split(`ATTRIBUTE_Batch`) %>% #group_split splits & stores the batchwise info as individual dataframes inside a list
lapply(., function(x) { # lapply applies the below function to each element within the list created by the previous step 
    x <- column_to_rownames(x,'filename') # then, we make "filename" as the rownames of each dataframe within the list
}) 

sapply(batch_df, dim) # gives the dimension of each list element columnwise.

with

batch_df <- ft_merged2 %>%
group_split(`ATTRIBUTE_Sample_Site`) %>% #group_split splits & stores the batchwise info as individual dataframes inside a list
lapply(., function(x) { # lapply applies the below function to each element within the list created by the previous step 
    x <- column_to_rownames(x,'filename') # then, we make "filename" as the rownames of each dataframe within the list
}) 

sapply(batch_df, dim) # gives the dimension of each list element columnwise.

ggplot produces an error:

ggplot(ft_merged, 
   aes(x=`ATTRIBUTE_Injection_order`, 
       y=`X7683_205.097_2.569`)) + #paste the y axis name from the previous cell output
geom_point(size=2.5, alpha=0.9, 
       aes(color=as.factor(`ATTRIBUTE_Batch`), 
           shape = `ATTRIBUTE_Sample.Type`)) +
geom_smooth(method = 'lm',na.rm = T) +  # to add a trend line
scale_y_continuous(labels = scales::scientific,
               limits=c(-1e6,max(ft_merged$`X7683_205.097_2.569`)))

I tried to replace it with:

ggplot(ft_merged, 
       aes(x=`ATTRIBUTE_date`, 
           y=`X7683_205.097_2.569`)) + #paste the y axis name from the previous cell output
geom_point(size=2.5, alpha=0.9, 
           aes(color=as.factor(`ATTRIBUTE_Sample_Site`), 
               shape = `ATTRIBUTE_type`)) +
geom_smooth(method = 'lm',na.rm = T) +  # to add a trend line
scale_y_continuous(labels = scales::scientific,
                   limits=c(-1e6,max(ft_merged$`X7683_205.097_2.569`)))

But it doesn't work...

Replace:

InsideLevels(new_md[2:10]) and

#Getting the blank based on the metadata
md_Blank <- new_md %>% filter(`ATTRIBUTE_Sample.Type` == "Blank") #filtering the rows from metadata with the condition = blank
Blank <- ft_t[which(rownames(ft_t) %in% (md_Blank$`filename`)),,drop=F] #getting the corresponding rows from ft_t

head(Blank,n=2)
dim(Blank)

with: InsideLevels(new_md[2:4]) and

#Getting the blank based on the metadata
md_Blank <- new_md %>% filter(`ATTRIBUTE_type` == "Bl") #filtering the rows from metadata with the condition = blank
Blank <- ft_t[which(rownames(ft_t) %in% (md_Blank$`filename`)),,drop=F] #getting the corresponding rows from ft_t

head(Blank,n=2)
dim(Blank)

(This goes on for the whole notebook)..

Functional-Metabolomics-Lab / FBMN-STATS

Errors found in notebook/github repo #24