#minimal set to reproduce integer overflow:
#We generate randomly distributed GRanges across the mouse genome.
#Take coverage and subset the coverage by the genome itself
library(GenomicRanges)
library(BSgenome)
#loding the mouse genome
Mouse10BSgenome <- keepStandardChromosomes(GenomeInfoDb::seqinfo(getBSgenome("BSgenome.Mmusculus.UCSC.mm10")))
#removing the circular chromosomes
Mouse10BSgenome <- GenomeInfoDb::dropSeqlevels(x = Mouse10BSgenome,
value = names(which(isCircular(Mouse10BSgenome))))
#convering to GRanges
Mouse10GR<-GRanges(Mouse10BSgenome)
numberOfChromosomes<-length(seqnames(Mouse10GR))
#generating small Genomic Ranges randomly distrubuted across the mouse genome
numberOfSmallRanges=200
lengthOfSmallRanges=200
#number of smallRanges at each chromosome (randomly distributed across the genome)
rangesPerChr <- as.list(rmultinom(n = 1, size = numberOfSmallRanges,
prob = rep(1/numberOfChromosomes, numberOfChromosomes)))
names(rangesPerChr)<-as.vector(seqnames(Mouse10GR))
#populating the chromosomes with small GRanges one by one
smallGR<-GRanges()
for (i in 1:numberOfChromosomes){
chrName<-names(Mouse10BSgenome)[i]
chrLen<-seqlengths(Mouse10BSgenome)[i]
numRanges<-rangesPerChr[[i]]
smallGR<-c(smallGR, GRanges(seqnames = rep(chrName, numRanges),
ranges = IRanges(start = sample(1:chrLen, numRanges), width = lengthOfSmallRanges)))
}
#generating coverage
mycov<-coverage(smallGR)
#adding zeros to the coverage to reach the chromosome length plus a little bit more
for (i in 1:length(mycov)){
chrname<-names(mycov)[i]
targetLen<-seqlengths(Mouse10BSgenome)[i]
currentLen<-length(mycov[[chrname]])
deltaLen<-targetLen - currentLen+2*lengthOfSmallRanges #add extra 2 lengthOfSmallRanges just to be sure
mycov[[chrname]]<-c(mycov[[chrname]],rep(0,deltaLen))
}
#Case A: does not work
mycov[Mouse10GR]
#Case B: does not work
#splitting the genome into 2 subsets
Mouse10GRfirst<-Mouse10GR[1:13]
Mouse10GRsecond<-Mouse10GR[14:21]
covFirst<-mycov[Mouse10GRfirst]
covSecond<-mycov[Mouse10GRsecond]
c(covFirst, covSecond) #does not work
#Case C: works
c(coverage(GRanges()),covFirst, covSecond)
I'm okay with using the workaround (Case C), but I need to understand why it works and why A,B do not work. Otherwise it would be wrong to use the workaround for manuscript preparation.
sessionInfo()
R version 4.2.1 (2022-06-23)
Platform: x86_64-apple-darwin17.0 (64-bit)
Running under: macOS Monterey 12.6.7
Problem: Subsetting an RleList by GRanges object results in integer overflow error, but there is a workaround.
Expected behavior: Code produces an RleList in cases A, B, C.
Observed behavior: Code throws error in cases A and B. Code produces an RleList in case C.
Produced error:
Required libraries: GenomicRanges , BSgenome, BSgenome.Mmusculus.UCSC.mm10
The test case code:
I'm okay with using the workaround (Case C), but I need to understand why it works and why A,B do not work. Otherwise it would be wrong to use the workaround for manuscript preparation.