YuLab-SMU / ChIPseeker

:dart: ChIP peak Annotation, Comparison and Visualization
https://onlinelibrary.wiley.com/share/author/GYJGUBYCTRMYJFN2JFZZ?target=10.1002/cpz1.585
219 stars 74 forks source link

bug fix #193 #205

Closed MingLi-929 closed 1 year ago

MingLi-929 commented 1 year ago

add columns to annotatePeak() to fix #193 The columns can help to get the specific columns from database. From the bug reported from #193, users can not get ENSEMBL even if pass in database

library(TxDb.Hsapiens.UCSC.hg19.knownGene)
library(EnsDb.Hsapiens.v86)

files <- getSampleFiles()
txdb <- TxDb.Hsapiens.UCSC.hg19.knownGene

Anno <-  annotatePeak(files[[4]], tssRegion=c(-3000, 3000), 
                     TxDb=TxDb.Hsapiens.UCSC.hg19.knownGene, 
                     annoDb="EnsDb.Hsapiens.v86")

> Anno@anno
GRanges object with 1331 ranges and 13 metadata columns:
         seqnames              ranges strand |             V4        V5
            <Rle>           <IRanges>  <Rle> |    <character> <numeric>
     [1]     chr1       815093-817883      * |    MACS_peak_1    295.76
     [2]     chr1     1243288-1244338      * |    MACS_peak_2     63.19
     [3]     chr1     2979977-2981228      * |    MACS_peak_3    100.16
     [4]     chr1     3566182-3567876      * |    MACS_peak_4    558.89
     [5]     chr1     3816546-3818111      * |    MACS_peak_5     57.57
     ...      ...                 ...    ... .            ...       ...
  [1327]     chrX 135244783-135245821      * | MACS_peak_1327     55.54
  [1328]     chrX 139171964-139173506      * | MACS_peak_1328    270.19
  [1329]     chrX 139583954-139586126      * | MACS_peak_1329    918.73
  [1330]     chrX 139592002-139593238      * | MACS_peak_1330    210.88
  [1331]     chrY   13845134-13845777      * | MACS_peak_1331     58.39
                     annotation   geneChr geneStart   geneEnd geneLength
                    <character> <integer> <integer> <integer>  <integer>
     [1]       Promoter (2-3kb)         1    803451    812182       8732
     [2]       Promoter (<=1kb)         1   1243994   1247057       3064
     [3]       Promoter (<=1kb)         1   2976181   2980350       4170
     [4]       Promoter (<=1kb)         1   3547331   3566671      19341
     [5]       Promoter (<=1kb)         1   3816968   3832011      15044
     ...                    ...       ...       ...       ...        ...
  [1327] Intron (uc010nrz.2/2..        23 135251455 135293518      42064
  [1328]       Promoter (<=1kb)        23 139173826 139175070       1245
  [1329]       Promoter (1-2kb)        23 139585152 139587225       2074
  [1330]      Distal Intergenic        23 139585152 139587225       2074
  [1331]      Distal Intergenic        24  14517915  14533389      15475
         geneStrand      geneId transcriptId distanceToTSS      SYMBOL    GENENAME
          <integer> <character>  <character>     <numeric> <character> <character>
     [1]          2      284593   uc001abt.4         -2911      FAM41C      FAM41C
     [2]          1      126789   uc001aed.3             0       PUSL1       PUSL1
     [3]          2      440556   uc001aka.3             0   LINC00982   LINC00982
     [4]          2       49856   uc001ako.3             0      WRAP73      WRAP73
     [5]          1   100133612   uc001alg.3             0   LINC01134   LINC01134
     ...        ...         ...          ...           ...         ...         ...
  [1327]          1        2273   uc004ezn.2         -5634        FHL1        FHL1
  [1328]          1      389895   uc031tkm.1          -320        <NA>        <NA>
  [1329]          2        6658   uc004fbd.1          1099        SOX3        SOX3
  [1330]          2        6658   uc004fbd.1         -4777        SOX3        SOX3
  [1331]          2      352887   uc022cji.1        687612        <NA>        <NA>
  -------
  seqinfo: 24 sequences from hg19 genome

after adding columns, users can get the specific columns from database.

Anno1 <- annotatePeak(files[[4]], tssRegion=c(-3000, 3000), 
                      TxDb=TxDb.Hsapiens.UCSC.hg19.knownGene, 
                      annoDb="EnsDb.Hsapiens.v86",
                      columns=c("ENTREZID", "GENEID", "SYMBOL", "GENENAME"))

> Anno1@anno
GRanges object with 1331 ranges and 14 metadata columns:
         seqnames              ranges strand |             V4        V5
            <Rle>           <IRanges>  <Rle> |    <character> <numeric>
     [1]     chr1       815093-817883      * |    MACS_peak_1    295.76
     [2]     chr1     1243288-1244338      * |    MACS_peak_2     63.19
     [3]     chr1     2979977-2981228      * |    MACS_peak_3    100.16
     [4]     chr1     3566182-3567876      * |    MACS_peak_4    558.89
     [5]     chr1     3816546-3818111      * |    MACS_peak_5     57.57
     ...      ...                 ...    ... .            ...       ...
  [1327]     chrX 135244783-135245821      * | MACS_peak_1327     55.54
  [1328]     chrX 139171964-139173506      * | MACS_peak_1328    270.19
  [1329]     chrX 139583954-139586126      * | MACS_peak_1329    918.73
  [1330]     chrX 139592002-139593238      * | MACS_peak_1330    210.88
  [1331]     chrY   13845134-13845777      * | MACS_peak_1331     58.39
                     annotation   geneChr geneStart   geneEnd geneLength
                    <character> <integer> <integer> <integer>  <integer>
     [1]       Promoter (2-3kb)         1    803451    812182       8732
     [2]       Promoter (<=1kb)         1   1243994   1247057       3064
     [3]       Promoter (<=1kb)         1   2976181   2980350       4170
     [4]       Promoter (<=1kb)         1   3547331   3566671      19341
     [5]       Promoter (<=1kb)         1   3816968   3832011      15044
     ...                    ...       ...       ...       ...        ...
  [1327] Intron (uc010nrz.2/2..        23 135251455 135293518      42064
  [1328]       Promoter (<=1kb)        23 139173826 139175070       1245
  [1329]       Promoter (1-2kb)        23 139585152 139587225       2074
  [1330]      Distal Intergenic        23 139585152 139587225       2074
  [1331]      Distal Intergenic        24  14517915  14533389      15475
         geneStrand      geneId transcriptId distanceToTSS          GENEID
          <integer> <character>  <character>     <numeric>     <character>
     [1]          2      284593   uc001abt.4         -2911 ENSG00000230368
     [2]          1      126789   uc001aed.3             0 ENSG00000169972
     [3]          2      440556   uc001aka.3             0 ENSG00000177133
     [4]          2       49856   uc001ako.3             0 ENSG00000116213
     [5]          1   100133612   uc001alg.3             0 ENSG00000236423
     ...        ...         ...          ...           ...             ...
  [1327]          1        2273   uc004ezn.2         -5634 ENSG00000022267
  [1328]          1      389895   uc031tkm.1          -320            <NA>
  [1329]          2        6658   uc004fbd.1          1099 ENSG00000134595
  [1330]          2        6658   uc004fbd.1         -4777 ENSG00000134595
  [1331]          2      352887   uc022cji.1        687612            <NA>
              SYMBOL    GENENAME
         <character> <character>
     [1]      FAM41C      FAM41C
     [2]       PUSL1       PUSL1
     [3]   LINC00982   LINC00982
     [4]      WRAP73      WRAP73
     [5]   LINC01134   LINC01134
     ...         ...         ...
  [1327]        FHL1        FHL1
  [1328]        <NA>        <NA>
  [1329]        SOX3        SOX3
  [1330]        SOX3        SOX3
  [1331]        <NA>        <NA>
  -------
  seqinfo: 24 sequences from hg19 genome

here we can see the ENSEMBL id