Open Jfriasaldeguer opened 5 years ago
Hi, Have you figured out how to plot clusters/states together with the heatmap? I tried using add_annotation_col in the plot_pseudotime_heatmap function but kept getting this error:
Error in if (nrow(add_annotation_col) != 100) { : argument is of length zero
I checked in pData and there is a column specifying the cluster. Thank you so much, D
I also came up with this question. Could anyone give any suggestion?
I have the same issue.
Just wanted to add a me too - I've been trying for a day to figure out how show how the heatmap compares with clusters and so far nothing has worked.
Same to me too.
I had the same issue as I wanted to annotate my heatmap with State. I wanted also to use subset of cells for heatmap.
This is how I solved it in my case and hope it can help.
Binner <- function(cells_subset){
df <- data.frame(pData(cds_object[,cells_subset]))
df <- df[,c("Pseudotime", "State")]
df <- df[order(df$Pseudotime, decreasing = F),]
len <- length(df$Pseudotime)
bin<-round(len/100)
State <- c()
value <- c()
for(i in 0:99){
if(i < 99){
start <- 1+(bin*i)
stop <- bin+(bin*i)
value <- median(as.numeric(as.vector(df$State[c(start:stop)])))
State <- c(State, value)
}
else{
State <- c(State, value)
}
}
return(as.data.frame(State))
}
As plot_pseudotime_heatmap used "genSmoothCurves" to create 100 bins for the heatmap, I did the same after ordering cell by pseudotime to take "median State" for each bin of cells. You then just need to run:
bin <- binner(mycells)
plot_pseudotime_heatmap(cds_object[mygenes,mycells],
cluster_rows = T,
cores = 1, add_annotation_col = bin,
show_rownames = F, return_heatmap = T, num_clusters = 5)
Then for me, I obtained expected annotation on the heatmap.
Sorry, I code as I think. Feel free to improve ^^
Try this function, you can adjust it according to your analysis
library(monocle)
library(tidyverse)
library(pheatmap)
my_branchplot <- function(cds_subset,
branch_point = 1,
branch_states = NULL,
branch_labels = c("Cell fate 1", "Cell fate 2"),
cluster_rows = TRUE,
hclust_method = "ward.D2",
num_clusters = 6,
hmcols = NULL,
branch_colors = c("#979797", "#F05662", "#7990C8"),
add_annotation_row = NULL,
add_annotation_col = NULL,
show_rownames = FALSE,
use_gene_short_name = TRUE,
scale_max = 3,
scale_min = -3,
norm_method = c("log", "vstExprs"),
trend_formula = "~sm.ns(Pseudotime, df=3) * Branch",
return_heatmap = FALSE,
cores = 1, ...)
{
pData(cds_subset)$Pseudotime <- (pData(cds_subset)$Pseudotime*100)/max(pData(cds_subset)$Pseudotime)
new_cds <- buildBranchCellDataSet(cds_subset,
branch_states = branch_states,
branch_point = branch_point,
progenitor_method = "duplicate")
new_cds@dispFitInfo <- cds_subset@dispFitInfo
if (is.null(branch_states)) {
progenitor_state <- subset(pData(cds_subset), Pseudotime ==
0)[, "State"]
branch_states <- setdiff(pData(cds_subset)$State, progenitor_state)
}
new_cds.pdata <- pData(new_cds)
new_cds.pdata$Cell_ID <- rownames(new_cds.pdata)
col_gap_ind <- table(new_cds.pdata$Branch)[[1]]
pseudotime.A <- new_cds.pdata %>%
filter(Branch==unique(as.character(pData(new_cds)$Branch))[1]) %>%
select(Cell_ID,Pseudotime)
pseudotime.A <- pseudotime.A[order(pseudotime.A$Pseudotime),]
pseudotime.B <- new_cds.pdata %>%
filter(Branch==unique(as.character(pData(new_cds)$Branch))[2]) %>%
select(Cell_ID,Pseudotime)
pseudotime.B <- pseudotime.B[order(pseudotime.B$Pseudotime),]
newdataA <- data.frame(Pseudotime = pseudotime.A$Pseudotime,
Branch = as.factor(unique(as.character(pData(new_cds)$Branch))[1]))
newdataB <- data.frame(Pseudotime = pseudotime.B$Pseudotime,
Branch = as.factor(unique(as.character(pData(new_cds)$Branch))[2]))
BranchAB_exprs <- genSmoothCurves(new_cds[, ],
cores = cores,
trend_formula = trend_formula,
relative_expr = T,
new_data = rbind(newdataA,newdataB))
colnames(BranchAB_exprs) <- c(pseudotime.A$Cell_ID,pseudotime.B$Cell_ID)
BranchA_exprs <- BranchAB_exprs[, 1:col_gap_ind]
BranchB_exprs <- BranchAB_exprs[, (col_gap_ind+1):ncol(BranchAB_exprs)]
norm_method <- match.arg(norm_method)
if (norm_method == "vstExprs") {
BranchA_exprs <- vstExprs(new_cds, expr_matrix = BranchA_exprs)
BranchB_exprs <- vstExprs(new_cds, expr_matrix = BranchB_exprs)
}
else if (norm_method == "log") {
BranchA_exprs <- log10(BranchA_exprs + 1)
BranchB_exprs <- log10(BranchB_exprs + 1)
}
heatmap_matrix <- cbind(BranchA_exprs[, (col_gap_ind - 1):1],
BranchB_exprs)
heatmap_matrix = heatmap_matrix[!apply(heatmap_matrix, 1,
sd) == 0, ]
heatmap_matrix = Matrix::t(scale(Matrix::t(heatmap_matrix),
center = TRUE))
heatmap_matrix = heatmap_matrix[is.na(row.names(heatmap_matrix)) ==
FALSE, ]
heatmap_matrix[is.nan(heatmap_matrix)] = 0
heatmap_matrix[heatmap_matrix > scale_max] = scale_max
heatmap_matrix[heatmap_matrix < scale_min] = scale_min
heatmap_matrix_ori <- heatmap_matrix
heatmap_matrix <- heatmap_matrix[is.finite(heatmap_matrix[,
1]) & is.finite(heatmap_matrix[, col_gap_ind]), ]
row_dist <- as.dist((1 - cor(Matrix::t(heatmap_matrix)))/2)
row_dist[is.na(row_dist)] <- 1
# exp_rng <- range(heatmap_matrix)
# bks <- seq(exp_rng[1] - 0.1, exp_rng[2] + 0.1, by = 0.1)
# if (is.null(hmcols)) {
# hmcols <- blue2green2red(length(bks) - 1)
# }
df <- new_cds.pdata[-col_gap_ind,]
df <- df %>%
select(Time,State,Branch,Pseudotime)
###only clustering not plot
ph <- pheatmap(heatmap_matrix,
useRaster = T,
cluster_cols = FALSE,
cluster_rows = TRUE,
show_rownames = F,
show_colnames = F,
annotation_col = df,
gaps_col = col_gap_ind,
clustering_distance_rows = row_dist,
clustering_method = hclust_method,
cutree_rows = num_clusters,
silent = F, ###not plot
filename = NA,
#breaks = bks,
color = hmcols)
annotation_row <- data.frame(Cluster = factor(cutree(ph$tree_row,
num_clusters)))
pheatmap(heatmap_matrix,
useRaster = T,
cluster_cols = FALSE,
cluster_rows = TRUE,
show_rownames = F,
show_colnames = F,
annotation_col = df,
annotation_row = annotation_row,
gaps_col = col_gap_ind,
clustering_distance_rows = row_dist,
clustering_method = hclust_method,
cutree_rows = num_clusters,
silent = F, ###not plot
filename = NA,
#breaks = bks,
color = hmcols)
}
### test it if it worked
cold <- colorRampPalette(c('#f7fcf0','#41b6c4','#253494','#081d58','#081d58'))
warm <- colorRampPalette(c('#ffffb2','#fecc5c','#e31a1c','#800026','#800026'))
mypalette <- c(rev(cold(21)), warm(20))
lung <- load_lung()
my_branchplot(cds_subset = lung,
branch_point = 1,
hmcols = mypalette)
I had the same issue as I wanted to annotate my heatmap with State. I wanted also to use subset of cells for heatmap.
This is how I solved it in my case and hope it can help.
Binner <- function(cells_subset){ df <- data.frame(pData(cds_object[,cells_subset])) df <- df[,c("Pseudotime", "State")] df <- df[order(df$Pseudotime, decreasing = F),] len <- length(df$Pseudotime) bin<-round(len/100) State <- c() value <- c() for(i in 0:99){ if(i < 99){ start <- 1+(bin*i) stop <- bin+(bin*i) value <- median(as.numeric(as.vector(df$State[c(start:stop)]))) State <- c(State, value) } else{ State <- c(State, value) } } return(as.data.frame(State)) }
As plot_pseudotime_heatmap used "genSmoothCurves" to create 100 bins for the heatmap, I did the same after ordering cell by pseudotime to take "median State" for each bin of cells. You then just need to run:
bin <- binner(mycells) plot_pseudotime_heatmap(cds_object[mygenes,mycells], cluster_rows = T, cores = 1, add_annotation_col = bin, show_rownames = F, return_heatmap = T, num_clusters = 5)
Then for me, I obtained expected annotation on the heatmap.
Sorry, I code as I think. Feel free to improve ^^
@arnlav ,thanks for your comments,I have tried it ,it worked,but the legend of "State" only show 4 color-block and column bar also only show 4 colors,Do you have any good suggestions to slove it ?
Thanks.
Hi,
I am trying to add an annotation column on the plot_pseudotime_heatmap function in order to link each plot column to a cluster or state. However I get this error:
Error in plot_pseudotime_heatmap(HSMMb[sig_gene_names, ], num_clusters = 1, : add_annotation_col should have only 100 rows (check genSmoothCurves before you supply the annotation data)!
I understand the pseudotime heatmap is binned to 100 columns, is there a way I can avoid such binning or a way I can manage to plot those clusters/states together with the heatmap?
Thanks a lot in advance! J