Closed farmohh closed 2 years ago
getTagMatrix.binning.internal()
is not an exported method.
Maybe you can try getTagMatrix()
.
Thank you for your prompt reply.
Actually I tried this function before and got the following error:
tagMatrix <- getTagMatrix(peak, windows=cpgi) Error in if (type == "body") { : argument is of length zero
There is a workflow to plot the peak profile.
1. getTagMatrix() ---> matrix --->plotTagMatrix functions(e.g. plotPeakProf())
2. getPromoters()/getBioRegion()/makeBioRegionFromGranges() ---> Granges object(windows) ---> getTagMatrix() ---> matrix --->plotTagMatrix functions(e.g. plotPeakProf())
In order to simplify the input parameters of plotTagMatrix functions
, some information has been attached to the matrix
produced by getTagMatrix()
in the form of attributes
. The following example show the details.
library(ChIPseeker)
library(TxDb.Hsapiens.UCSC.hg19.knownGene)
txdb <- TxDb.Hsapiens.UCSC.hg19.knownGene
peak <- getSampleFiles()[[4]]
mt <- getTagMatrix(peak = peak,
upstream = 1000,
downstream = 1000,
TxDb = txdb,
by = "gene",
type = "start_site")
> class(mt)
[1] "matrix" "array"
> attributes(mt)
$dim
[1] 566 2001
$dimnames
$dimnames[[1]]
[1] "44" "49" "152" "159" "216" "233" "243"
[8] "249" "369" "438" "444" "526" "605" "638"
[15] "651" "736" "816" "839" "853" "931" "941"
[22] "1032" "1090" "1091" "1153" "1155" "1163" "1165"
[29] "1233" "1246" "1351" "1481" "1551" "1568" "1576"
[36] "1581" "1596" "1619" "1644" "1669" "1821" "1905"
[43] "1968" "2012" "2185" "2227" "2238" "2246" "2326"
[50] "2342" "2358" "2375" "2446" "2537" "2590" "2660"
[57] "2696" "2718" "2739" "2743" "2774" "2814" "2890"
[64] "2993" "3024" "3146" "3209" "3284" "3297" "3319"
[71] "3346" "3351" "3370" "3401" "3408" "3427" "3441"
[78] "3495" "3580" "3623" "3626" "3657" "3720" "3729"
[85] "3771" "3788" "3817" "3824" "3841" "3856" "3981"
[92] "4032" "4059" "4087" "4108" "4141" "4274" "4276"
[99] "4278" "4319" "4323" "4346" "4367" "4381" "4397"
[106] "4466" "4467" "4538" "4601" "4618" "4624" "4667"
[113] "4671" "4675" "4686" "4693" "4720" "4771" "4795"
[120] "4798" "4822" "4840" "4857" "4883" "4892" "4899"
[127] "4904" "4911" "4956" "4960" "4995" "5003" "5008"
[134] "5052" "5062" "5096" "5110" "5131" "5138" "5145"
[141] "5149" "5283" "5318" "5346" "5354" "5379" "5387"
[148] "5515" "5520" "5628" "5665" "5701" "5767" "5813"
[155] "5856" "5858" "6016" "6044" "6097" "6108" "6135"
[162] "6424" "6461" "6483" "6496" "6501" "6569" "6653"
[169] "6668" "6683" "6687" "6734" "6740" "6796" "6850"
[176] "6919" "6950" "7053" "7055" "7097" "7112" "7117"
[183] "7138" "7152" "7168" "7234" "7238" "7326" "7377"
[190] "7391" "7400" "7473" "7534" "7537" "7554" "7617"
[197] "7625" "7700" "7744" "7823" "7938" "7999" "8010"
[204] "8041" "8046" "8123" "8175" "8233" "8308" "8437"
[211] "8470" "8483" "8485" "8511" "8551" "8586" "8625"
[218] "8662" "8722" "8732" "8743" "8879" "8948" "8981"
[225] "9005" "9067" "9087" "9165" "9193" "9214" "9224"
[232] "9231" "9254" "9328" "9350" "9353" "9363" "9369"
[239] "9388" "9406" "9508" "9528" "9598" "9629" "9643"
[246] "9720" "9725" "9732" "9751" "9764" "9769" "9773"
[253] "9800" "9894" "9903" "9919" "9951" "9979" "10067"
[260] "10088" "10113" "10131" "10225" "10264" "10331" "10341"
[267] "10365" "10374" "10401" "10408" "10422" "10441" "10442"
[274] "10459" "10463" "10509" "10522" "10554" "10616" "10630"
[281] "10652" "10654" "10680" "10771" "10868" "10896" "10917"
[288] "10953" "10968" "10978" "10997" "11014" "11026" "11050"
[295] "11162" "11215" "11295" "11318" "11373" "11387" "11453"
[302] "11541" "11558" "11597" "11845" "11863" "11929" "12039"
[309] "12054" "12060" "12078" "12123" "12218" "12233" "12295"
[316] "12325" "12419" "12443" "12451" "12465" "12521" "12565"
[323] "12568" "12582" "12585" "12613" "12614" "12619" "12649"
[330] "12663" "12665" "12675" "12688" "12761" "12763" "12859"
[337] "12862" "12872" "12935" "13032" "13101" "13108" "13110"
[344] "13131" "13190" "13237" "13380" "13408" "13433" "13510"
[351] "13572" "13602" "13639" "13729" "13786" "13800" "13811"
[358] "13842" "13910" "13921" "13926" "13944" "13951" "13954"
[365] "13957" "13995" "14041" "14057" "14155" "14516" "14546"
[372] "14563" "14612" "14633" "14638" "14660" "14662" "14675"
[379] "14678" "14690" "14732" "14738" "14808" "14873" "14890"
[386] "14924" "14943" "14977" "14985" "14994" "15072" "15108"
[393] "15148" "15157" "15184" "15213" "15217" "15218" "15221"
[400] "15235" "15285" "15314" "15321" "15364" "15543" "15602"
[407] "15666" "15763" "15784" "15788" "15797" "15846" "15856"
[414] "15882" "15894" "15965" "16023" "16053" "16084" "16109"
[421] "16169" "16203" "16206" "16246" "16265" "16284" "16291"
[428] "16295" "16304" "16411" "16455" "16459" "16489" "16495"
[435] "16545" "16562" "16564" "16620" "16664" "16679" "16766"
[442] "16857" "16860" "16883" "16889" "16924" "16930" "16959"
[449] "16970" "16972" "16981" "17064" "17083" "17097" "17125"
[456] "17130" "17198" "17245" "17290" "17312" "17339" "17357"
[463] "17375" "17422" "17513" "17558" "17565" "17661" "17669"
[470] "17732" "17793" "17933" "17975" "18054" "18057" "18116"
[477] "18153" "18165" "18282" "18313" "18392" "18452" "18472"
[484] "18572" "18605" "18609" "18649" "18659" "18721" "18738"
[491] "18759" "18764" "18816" "18945" "18953" "19089" "19114"
[498] "19128" "19139" "19244" "19274" "19277" "19320" "19357"
[505] "19376" "19416" "19637" "19638" "19710" "19755" "19766"
[512] "19798" "19907" "19957" "19982" "19993" "20010" "20043"
[519] "20058" "20142" "20178" "20217" "20326" "20367" "20375"
[526] "20390" "20423" "20650" "20731" "20784" "20787" "20797"
[533] "20801" "20813" "20870" "20901" "20940" "20983" "21019"
[540] "21076" "21090" "21104" "21123" "21195" "21279" "21298"
[547] "21372" "21391" "21400" "21473" "21529" "21566" "21571"
[554] "21581" "21594" "21603" "21627" "21640" "21725" "21763"
[561] "21789" "21806" "21811" "21868" "22015" "22042"
$dimnames[[2]]
NULL
$upstream
[1] 1000
$downstream
[1] 1000
$type
[1] "start_site"
$label
[1] "TSS"
$is.binning
[1] FALSE
Also, the granges object produced by getPromoters()/getBioRegion()/makeBioRegionFromGranges()
carry some information in the form of attributes
in order to simplify the input parameters of getTagMatrix()
.
pt <- getPromoters(TxDb = txdb,
upstream = 1000,
downstream = 1000)
> class(pt)
[1] "GRanges"
attr(,"package")
[1] "GenomicRanges"
> attributes(pt)
$seqnames
factor-Rle of length 22801 with 17606 runs
Lengths: 1 1 1 1 ... 1 1 1 1
Values : chr19 chr8 chr20 chr18 ... chr21 chr22 chr6 chr22
Levels(93): chr1 chr2 chr3 ... chrUn_gl000248 chrUn_gl000249
$strand
factor-Rle of length 22801 with 11257 runs
Lengths: 1 1 3 2 1 49 1 9 2 ... 1 1 1 2 1 1 1 1
Values : - + - + - + - + - ... + - + - + - + -
Levels(3): + - *
$ranges
IRanges object with 22801 ranges and 0 metadata columns:
start end width
<integer> <integer> <integer>
[1] 58873214 58875214 2001
[2] 18247755 18249755 2001
[3] 43279376 43281376 2001
[4] 25756445 25758445 2001
[5] 244005886 244007886 2001
... ... ... ...
[22797] 115094944 115096944 2001
[22798] 35735323 35737323 2001
[22799] 19108967 19110967 2001
[22800] 90538619 90540619 2001
[22801] 50963905 50965905 2001
$seqinfo
Seqinfo object with 93 sequences from an unspecified genome; no seqlengths:
seqnames seqlengths isCircular genome
chr1 <NA> <NA> <NA>
chr2 <NA> <NA> <NA>
chr3 <NA> <NA> <NA>
chr4 <NA> <NA> <NA>
chr5 <NA> <NA> <NA>
... ... ... ...
chrUn_gl000245 <NA> <NA> <NA>
chrUn_gl000246 <NA> <NA> <NA>
chrUn_gl000247 <NA> <NA> <NA>
chrUn_gl000248 <NA> <NA> <NA>
chrUn_gl000249 <NA> <NA> <NA>
$elementMetadata
DataFrame with 22801 rows and 0 columns
$elementType
[1] "ANY"
$metadata
list()
$class
[1] "GRanges"
attr(,"package")
[1] "GenomicRanges"
$type
[1] "start_site"
$label
[1] "TSS"
$upstream
[1] 1000
$downstream
[1] 1000
In a word, the matrix
produced by getTagMatrix()
and the window
parameter in the getTagMatrix()
carry specific information to simplify the input parameters. So the window
parameter in the getTagMatrix()
should be created by getPromoters()/getBioRegion()/makeBioRegionFromGranges()
. Or omit the window
parameter and use getTagMatrix()
to call getPromoters()/getBioRegion()/makeBioRegionFromGranges()
and make the window
from a fresh start.
For the reasons above,
Actually I tried this function before and got the following error: tagMatrix <- getTagMatrix(peak, windows=cpgi) Error in if (type == "body") { : argument is of length zero
The error happened because the windows=cpgi
is a normal Granges object carrying no imformation. You should use the getPromoters()/getBioRegion()/makeBioRegionFromGranges()
functions to organized the cpgi
. Pull Request #190 supported users to investigate any region of interest through self-made granges object(cpgi
in your example).
Since this pr has not been merged into master, you can try the code in this way temporarily.
devtools::install_github("MingLi-929/ChIPseeker")
I have tried to get "cpgislands" instead of promoters in order to get distribution of MeDIP peaks over CpG islands, shore and shelf.
Since I can not get more details from your message, I just make some example code to meet your need.
CpG islands, I think is the body of CpG
# this work flow need to make windows first
CpG <- makeBioRegionFromGranges(gr = cpgi,
by = "CpG",
type = "body")
# investigate body region need binning method
# so the nbin parameter is needed
mt <- getTagMatrix(peak,
windows = CpG, nbin = 800)
# Or you can do all the things in one step
# use the getTagMatrix() to call makeBioRegionFromGranges()
mt1 <- getTagMatrix(peak,
type = "body",
by = "CpG",
gr=cpgi,
nbin = 800)
# Also you can look into the flank region of CpG islands
# rel(0.2) means each flank account for 20% length of the body regions
mt1 <- getTagMatrix(peak,
type = "body",
by = "CpG",
gr=cpgi,
nbin = 800,
upstream = rel(0.2),
downstream = rel(0.2))
In my point of view, CpG shore means the upstream and downstream of CpG islands, maybe I can take it 2kb. So the goal is gonna to look into the 2kb from the CpG start site and end site. In the example, you provided:
CpGislands:
seqnames ranges strand | name
<Rle> <IRanges> <Rle> | <character>
[1] chr1 28736-29810 * | CpG:_116
CpG shore:
start site: (28736-2000)-28736
end site: 29810-(29810+2000)
If my do not misunderstand, the code can be like this
CpG_shore_start_site <- makeBioRegionFromGranges(gr = cpgi,
by = "CpG",
type = "start_site",
upstream = 2000,
downstream = 0)
CpG_shore_end_site <- makeBioRegionFromGranges(gr = cpgi,
by = "CpG",
type = "end_site",
upstream = 0,
downstream = 2000)
mt <- getTagMatrix(peak,
windows = CpG_shore_start_site)
mt1 <- getTagMatrix(peak,
windows = CpG_shore_end_site )
Or if you wanna investigate the CpG_shore in the center of CpG_start_site
CpG_shore_start_site <- makeBioRegionFromGranges(gr = cpgi,
by = "CpG",
type = "start_site",
upstream = 2000,
downstream = 2000)
Thank you ever so much. It solved the issue. But I just wanted to check something with you:
Using nbin=800, I got an error as follow: 3179 peaks(77.78322%), having lengths smaller than 800bp, are filtered... 2022-05-16 4:40:02 PM Error in tagMatrixList[[i]][[j]] : subscript out of bounds
So I decreased the number of bins to 100 (nbin=100) 0 peaks(0%), having lengths smaller than 100bp, are filtered... 2022-05-16 4:43:31 PM
I get the plot and it looks reasonable.
nbin
parameter stand for the amount of bin used in the binning methond.
a bin
in binning methond is supposed to hold a number of DNA base.
for example, a region has a length of 1000bp, and use nbin=10
to bin it. The number of bases in a bin is 1000bp/10 =100bp.
If the nbin
too big, for example, a region with a length of 1000bp and nbin=1200
, and the number of bases in this bin is 1000bp/1200 = 1/12 < 1. In this case, the region will be omitted.
Go back to the question here,
Using nbin=800, I got an error as follow: 3179 peaks(77.78322%), having lengths smaller than 800bp, are filtered... 2022-05-16 4:40:02 PM Error in tagMatrixList[[i]][[j]] : subscript out of bounds
If the nbin=800
, 3179 peaks(77.78322%) are filtered, and there leave little peak. And the function go wrongs, for subscript out of bounds
.
So,
So I decreased the number of bins to 100 (nbin=100) 0 peaks(0%), having lengths smaller than 100bp, are filtered... 2022-05-16 4:43:31 PM
In this way, no peak are filtered, and everything go properly. The plot and the data are credible.
Dear Developer,
Thank you for the great package.
Actually, I have run into the same problem with using getTagMatrix to process customized Granges (body type). A demo is shown as follows,
peak <- readPeakFile(files[[4]])
test.range <- GRanges(seqnames = c('chr1','chrX','chr1'),ranges = IRanges(c(815094,139583955,3816546),
width=200,gene_id=c('1','100','1000')))
attr(test.range,'type') <- 'body'
attr(test.range,'label') <- c('TSS','TTS')
attr(test.range,'upstream') <- NULL
attr(test.range,'downstream') <- NULL
test.matrix <- getTagMatrix(peak,windows = test.range,nbin = 20,weightCol = 'V5')
test.matrix
the result is as follows,
[,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10] [,11] [,12] [,13] [,14] [,15] [,16] [,17] [,18] [,19] [,20]
[1,] 295.76 295.76 295.76 295.76 295.76 295.76 295.76 295.76 295.76 295.76 57.57 57.57 57.57 57.57 57.57 57.57 57.57 57.57 57.57 60.600
[2,] 918.73 918.73 918.73 918.73 918.73 918.73 918.73 918.73 918.73 918.73 918.73 918.73 918.73 918.73 918.73 918.73 918.73 918.73 918.73 1020.811
attr(,"type")
[1] "body"
attr(,"label")
[1] "TSS" "TTS"
attr(,"is.binning")
[1] TRUE
Since, I have created a GRanges object with 3 different elements (ranges), it's expected that we will get a 3 x 20 matrix (nbin = 20), not a 2 x 20 matrix.
I have checked the source code, and found the problem is may caused by line #691 in tagMatrix.R,
tagMatrixList <- lapply(peakView, function(x) viewApply(x, as.vector))
Once I changed it into,
tagMatrixList <- lapply(peakView, function(x) lapply(x, as.vector))
The result becomes a 3 x 20 matrix what we expected. The viewApply function seems to generate a matrix for ranges in each chromosome.
Another problem is that the Granges I created all have a flat coverage or score (weightCol = 'V5'), so the values in each row of the matrix should be equal, however, the last element in each row always is slightly larger than others. This should be caused by the line #956 in tagMatrix.R,
tagMatrix[[i]][j,nbin] <- read/(length(tagMatrixList[[i]][[j]])-cursor)
Once I changed it into,
tagMatrix[[i]][j,nbin] <- read/(length(tagMatrixList[[i]][[j]])-cursor+1)
The binning score become correct. Following the coding logic, the element indicated by 'cursor' should be a member of the last bin.
Hopes this info could help you to update your package.
> sessionInfo()
R version 4.2.0 (2022-04-22)
Platform: x86_64-pc-linux-gnu (64-bit)
Running under: Ubuntu 20.04.4 LTS
Matrix products: default
BLAS: /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.9.0
LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.9.0
Random number generation:
RNG: Mersenne-Twister
Normal: Inversion
Sample: Rounding
locale:
[1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C LC_TIME=zh_CN.UTF-8 LC_COLLATE=en_US.UTF-8 LC_MONETARY=zh_CN.UTF-8
[6] LC_MESSAGES=en_US.UTF-8 LC_PAPER=zh_CN.UTF-8 LC_NAME=C LC_ADDRESS=C LC_TELEPHONE=C
[11] LC_MEASUREMENT=zh_CN.UTF-8 LC_IDENTIFICATION=C
attached base packages:
[1] stats4 stats graphics grDevices utils datasets methods base
other attached packages:
[1] TxDb.Hsapiens.UCSC.hg19.knownGene_3.2.2 GenomicFeatures_1.48.3 AnnotationDbi_1.58.0
[4] Biobase_2.56.0 GenomicRanges_1.48.0 GenomeInfoDb_1.32.2
[7] IRanges_2.30.0 S4Vectors_0.34.0 BiocGenerics_0.42.0
[10] ChIPseeker_1.32.0
Thank u for your advice!! Your two questions help us a lot.
There is a small different for the first question but your advice help us any way
Since, I have created a GRanges object with 3 different elements (ranges), it's expected that we will get a 3 x 20 matrix (nbin = 20), not a 2 x 20 matrix.
the bug comes from https://github.com/YuLab-SMU/ChIPseeker/blob/489e2834a4a6e3db3d66731760be6abd8c9e5a89/R/tagMatrix.R#L759-L763 rather than https://github.com/YuLab-SMU/ChIPseeker/blob/489e2834a4a6e3db3d66731760be6abd8c9e5a89/R/tagMatrix.R#L691
Dear Yu, Thank you for the great package.
I have tried to get "cpgislands" instead of promoters in order to get distribution of MeDIP peaks over CpG islands, shore and shelf.
CpGislands:
peak: seqnames ranges strand | V4 V5 V6 V7