hdng / clonevol

Inferring and visualizing clonal evolution in multi-sample cancer sequencing
GNU General Public License v3.0
141 stars 45 forks source link

question about enumerating clonal architectures #27

Closed sszhang72 closed 5 years ago

sszhang72 commented 5 years ago

Hello,

I got some problems when running clonevol with 6 samples which have been divided into 11 clusters. It dose not report any errors but keep stayings in the state as follows more than 24h. I doubt weather there exits a limit to the number of clusters for clonevol.

There's no any progress but the following information:

There were 14 warnings (use warnings() to see them) null device 1 There were 14 warnings (use warnings() to see them) null device 1 null device 1 Sample 1: TD25 <-- TD25 Sample 2: TD26 <-- TD26 Sample 3: TD27 <-- TD27 Sample 4: TD28 <-- TD28 Sample 5: TD29 <-- TD29 Sample 6: TD30 <-- TD30 Using monoclonal model Note: all VAFs were divided by 100 to convert from percentage to proportion. Generating non-parametric boostrap samples... TD25 : Enumerating clonal architectures... Determining if cluster VAF is significantly positive... Exluding clusters whose VAF < min.cluster.vaf=0 Non-positive VAF clusters

The code and input file are as follows:

x=read.table("/data/quantumclone/sci_clonevol_140_0123_no123.tsv", header=TRUE, sep="\t") library(clonevol)

preparation

shorten vaf column names as they will be

vaf.col.names <- grep('.vaf', colnames(x), value=TRUE) sample.names <- gsub('.vaf', '', vaf.col.names) x[, sample.names] <- x[, vaf.col.names] vaf.col.names <- sample.names

prepare sample grouping

sample.groups <- c('1', '2', '3', '4', '5', '6'); names(sample.groups) <- vaf.col.names

setup the order of clusters to display in various plots (later)

x <- x[order(x$cluster),] clone.colors <- c('#999793', '#8d4891', '#f8e356', '#fe9536', '#d7352e', "#FF3030", '#FFE4E1', '#2F4F4F', '#191970', '#6495ED', '#7FFF00' )

clone.colors <- NULL

pdf('box.pdf', width = 3, height = 3, useDingbats = FALSE, title='') pp <- plot.variant.clusters(x, cluster.col.name = 'cluster', show.cluster.size = FALSE, cluster.size.text.color = 'blue', vaf.col.names = vaf.col.names, vaf.limits = 70, sample.title.size = 20, violin = FALSE, box = FALSE, jitter = TRUE, jitter.shape = 1, jitter.color = clone.colors, jitter.size = 3, jitter.alpha = 1, jitter.center.method = 'median', jitter.center.size = 1, jitter.center.color = 'darkgray', jitter.center.display.value = 'none', highlight = 'is.driver', highlight.shape = 21, highlight.color = 'blue', highlight.fill.color = 'green', highlight.note.col.name = 'gene', highlight.note.size = 2, order.by.total.vaf = FALSE) dev.off()

plot clusters pairwise-ly

plot.pairwise(x, col.names = vaf.col.names, out.prefix = 'variants.pairwise.plot', colors = clone.colors)

plot mean/median of clusters across samples (cluster flow)

pdf('flow.pdf', width=3, height=3, useDingbats=FALSE, title='') plot.cluster.flow(x, vaf.col.names = vaf.col.names, sample.names = c('1', '2', '3', '4', '5', '6'), colors = clone.colors) dev.off()

infer consensus clonal evolution trees

y = infer.clonal.models(variants = x, cluster.col.name = 'cluster', vaf.col.names = vaf.col.names, sample.groups = sample.groups, cancer.initiation.model='monoclonal', subclonal.test = 'bootstrap', subclonal.test.model = 'non-parametric', num.boots = 1000, founding.cluster = 1, cluster.center = 'mean', ignore.clusters = NULL, clone.colors = clone.colors, min.cluster.vaf = 0.00,

min probability that CCF(clone) is non-negative

                    sum.p = 0.05,
                    # alpha level in confidence interval estimate for CCF(clone)
                    alpha = 0.05)

map driver events onto the trees

y <- transfer.events.to.consensus.trees(y, x[x$is.driver,], cluster.col.name = 'cluster', event.col.name = 'gene')

prepare branch-based trees

y <- convert.consensus.tree.clone.to.branch(y, branch.scale = 'sqrt')

plot variant clusters, bell plots, cell populations, and trees

plot.clonal.models(y,

box plot parameters

               box.plot = TRUE,
               fancy.boxplot = TRUE,
               fancy.variant.boxplot.highlight = 'is.driver',
               fancy.variant.boxplot.highlight.shape = 21,
               fancy.variant.boxplot.highlight.fill.color = 'red',
               fancy.variant.boxplot.highlight.color = 'black',
               fancy.variant.boxplot.highlight.note.col.name = 'gene',
               fancy.variant.boxplot.highlight.note.color = 'blue',
               fancy.variant.boxplot.highlight.note.size = 2,
               fancy.variant.boxplot.jitter.alpha = 1,
               fancy.variant.boxplot.jitter.center.color = 'grey50',
               fancy.variant.boxplot.base_size = 12,
               fancy.variant.boxplot.plot.margin = 1,
               fancy.variant.boxplot.vaf.suffix = '.VAF',
               # bell plot parameters
               clone.shape = 'bell',
               bell.event = TRUE,
               bell.event.label.color = 'blue',
               bell.event.label.angle = 60,
               clone.time.step.scale = 1,
               bell.curve.step = 2,
               # node-based consensus tree parameters
               merged.tree.plot = TRUE,
               tree.node.label.split.character = NULL,
               tree.node.shape = 'circle',
               tree.node.size = 30,
               tree.node.text.size = 0.5,
               merged.tree.node.size.scale = 1.25,
               merged.tree.node.text.size.scale = 2.5,
               merged.tree.cell.frac.ci = FALSE,
               # branch-based consensus tree parameters
               merged.tree.clone.as.branch = TRUE,
               mtcab.event.sep.char = ',',
               mtcab.branch.text.size = 1,
               mtcab.branch.width = 0.75,
               mtcab.node.size = 3,
               mtcab.node.label.size = 1,
               mtcab.node.text.size = 1.5,
               # cellular population parameters
               cell.plot = TRUE,
               num.cells = 100,
               cell.border.size = 0.25,
               cell.border.color = 'black',
               clone.grouping = 'horizontal',
               #meta-parameters
               scale.monoclonal.cell.frac = TRUE,
               show.score = FALSE,
               cell.frac.ci = TRUE,
               disable.cell.frac = FALSE,
               # output figure parameters
               out.dir = 'output',
               out.format = 'pdf',
               overwrite.output = TRUE,
               width = 8,
               height = 4,
               # vector of width scales for each panel from left to right
               panel.widths = c(3,4,2,4,2))

plot trees only

pdf('trees.pdf', width = 3, height = 5, useDingbats = FALSE) plot.all.trees.clone.as.branch(y, branch.width = 0.5, node.size = 1, node.label.size = 0.5) dev.off()

Inputfiles: sci_clonevol_140_0123_no123.txt

I look forward to your help!

hdng commented 5 years ago

min.cluster.vaf = 0.00 is the culprit. This param is to determine if a cluster of variants is present in a sample. Clusters with mean (or median) VAF < min.cluster.vaf in a sample will be excluded from the sample before inferring trees for that sample. This should be set at a value > 0, typically representing the error rate in sequencing, eg. 0.01.

Another issue is in sample TD30, cluster 2 have higher VAF compared with cluster 1 likely due to low number of variants (see the image). Setting sum.p = 0.01 allows for more relax error tolerance that yields one pruned consensus tree (see code).

image


pdf('box.pdf', width = 5, height = 8, useDingbats = FALSE, title='')
pp <- plot.variant.clusters(x,
                            cluster.col.name = 'cluster',
                            show.cluster.size = FALSE,
                            cluster.size.text.color = 'blue',
                            vaf.col.names = vaf.col.names,
                            vaf.limits = 70,
                            sample.title.size = 20,
                            violin = FALSE,
                            box = FALSE,
                            jitter = TRUE,
                            jitter.shape = 1,
                            jitter.color = clone.colors,
                            jitter.size = 1,
                            jitter.alpha = 1,
                            jitter.center.method = 'median',
                            jitter.center.size = 1,
                            jitter.center.color = 'darkgray',
                            jitter.center.display.value = 'none',
                            highlight = 'is.driver',
                            highlight.shape = 21,
                            highlight.color = 'blue',
                            highlight.fill.color = 'green',
                            highlight.note.col.name = 'gene',
                            highlight.note.size = 2,
                            order.by.total.vaf = FALSE)
dev.off()

y = infer.clonal.models(variants = x,
                        cluster.col.name = 'cluster',
                        vaf.col.names = vaf.col.names,
                        sample.groups = sample.groups,
                        cancer.initiation.model='monoclonal',
                        subclonal.test = 'bootstrap',
                        subclonal.test.model = 'non-parametric',
                        num.boots = 1000,
                        founding.cluster = 1,
                        cluster.center = 'mean',
                        ignore.clusters = NULL,
                        clone.colors = clone.colors,
                        min.cluster.vaf = 0.01,
                        # min probability that CCF(clone) is non-negative
                        sum.p = 0.01,
                        # alpha level in confidence interval estimate for CCF(clone)
                        alpha = 0.05)