Closed ixxmu closed 2 years ago
###如果你不报错,下面的代码会帮助你很轻松的安装上pathview
if (!requireNamespace("BiocManager", quietly=TRUE))install.packages("BiocManager")
if (!requireNamespace("pathview", quietly=TRUE))BiocManager::install("pathview")
###如果你不能自动安装的话,可能就得花点功夫了:
if (!requireNamespace("BiocManager", quietly=TRUE))install.packages("BiocManager")
BiocManager::install(c("Rgraphviz", "png", "KEGGgraph", "org.Hs.eg.db"))
install.packages("pathview",repos="http://R-Forge.R-project.org")
### 下面两个包你需要去CRAN下载好了从本地安装
install.packages("/your/local/directory/pathview_1.0.0.tar.gz",repos = NULL, type = "source")
install.packages("/your/local/directory/XML_3.95-0.2.zip", repos = NULL)
#最核心的当然就是pathview函数
library(pathview)
data(gse16873.d)
head(gse16873.d)
DCIS_1 DCIS_2 DCIS_3 DCIS_4 DCIS_5 DCIS_6
10000 -0.30764480 -0.14722769 -0.023784808 -0.07056193 -0.001323087 -0.15026813
10001 0.41586805 -0.33477259 -0.513136907 -0.16653712 0.111122223 0.13400734
10002 0.19854925 0.03789588 0.341865341 -0.08527420 0.767559264 0.15828609
10003 -0.23155297 -0.09659311 -0.104727283 -0.04801404 -0.208056443 0.03344448
100048912 -0.04490724 -0.05203146 0.036390376 0.04807823 0.027205816 0.05444739
10004 -0.08756237 -0.05027725 0.001821133 0.03023835 0.008034394 -0.06860749
pv.out <- pathview(gene.data = gse16873.d[, 1], pathway.id = "04110",
species = "hsa", out.suffix = "gse16873")
gene.data——单样本或多样本的数据,行名是KEGG的基因ID(或者转录本ID、蛋白质ID、化合物ID),列名是样本名。
pathway.id——选择你想展示的通路编号,你可以用下面的方式来查看可选择的通路编号
data(paths.hsa)
head(paths.hsa)
hsa00010 hsa00020
"Glycolysis / Gluconeogenesis" "Citrate cycle (TCA cycle)"
hsa00030 hsa00040
"Pentose phosphate pathway" "Pentose and glucuronate interconversions"
hsa00051 hsa00052
"Fructose and mannose metabolism" "Galactose metabolism"
library(pathview)
library(help=pathview)
help(pathview)
?pathview
#参数
gene.data
#解释过了
cpd.data
#跟上一个参数类似,不过输入的是KEGG compound IDs(CHEMBL database收录)
#注意gene.data与cpd.data 作为输入数据,不能同时为空
pathway.id
#解释过了
species
#这个不用多解释了,科学命名法(例如"Homo sapiens")与常用名(例如"human")
#都是支持的,但是为了防止不必要的麻烦,最好还是用kegg.code
#如果你想选择特定的物种,可以在这里找:
data(korg)
head(korg)
ktax.id tax.id kegg.code scientific.name common.name
[1,] "T01001" "9606" "hsa" "Homo sapiens" "human"
[2,] "T01005" "9598" "ptr" "Pan troglodytes" "chimpanzee"
[3,] "T02283" "9597" "pps" "Pan paniscus" "bonobo"
[4,] "T02442" "9595" "ggo" "Gorilla gorilla gorilla" "western lowland gorilla"
[5,] "T01416" "9601" "pon" "Pongo abelii" "Sumatran orangutan"
[6,] "T03265" "61853" "nle" "Nomascus leucogenys" "northern white-cheeked gibbon"
entrez.gnodes kegg.geneid ncbi.geneid ncbi.proteinid uniprot
[1,] "1" "374659" "374659" "NP_001273380" "Q8N4P3"
[2,] "1" "474020" "474020" "XP_001140087" "Q1XHV8"
[3,] "1" "100989900" "100989900" "XP_003811308" NA
[4,] "1" "101125212" "101125212" "XP_018886437" "G3QNH0"
[5,] "1" "100172878" "100172878" "NP_001125944" "Q5R9G0"
[6,] "1" "105739221" "105739221" "XP_012359712" "G1RK33"
kegg.dir
#输出文件的路径,
cpd.idtype
#告诉函数cpd.data.中用的化合物编号来源
gene.idtype
#告诉函数gene.data的ID来源,可以用以下方式查看
data(bods)
head(bods)
package species kegg code id.type
[1,] "org.Ag.eg.db" "Anopheles" "aga" "eg"
[2,] "org.At.tair.db" "Arabidopsis" "ath" "tair"
[3,] "org.Bt.eg.db" "Bovine" "bta" "eg"
[4,] "org.Ce.eg.db" "Worm" "cel" "eg"
[5,] "org.Cf.eg.db" "Canine" "cfa" "eg"
[6,] "org.Dm.eg.db" "Fly" "dme" "eg"
gene.annotpkg
#可以调用其他包完输入基因名的转换(symbols and Entrez, gene ID的互相转换)
#这里不明白的可以看一下
答读者问(五)如何实现各物种基因的ID/symbol的转换
min.nnodes
#包含最小nodes("gene","enzyme", "compound" or "ortholog")的通路阈值
#默认为3
kegg.native
#用KEGG的原画(T,位图 pNG)
#还是不用(F,矢量图 PDF)
map.null
#空的gene.data或cpd.data在nodes中的显示方式,选TRUE就会给unmap的node显示NA的颜色
#选FALSE的时候只会显示umapped的形式
expand.node
#只有在kegg.native=FALSE时才会生效。默认一个点是会包含一类相似的基因或代谢物等
#expand.node=T,可以将他们拆开单独展示
split.group
#按group拆分nodes,同样也仅在kegg.native=FALSE时生效
map.symbol
#gene的node是否以symbol的格式显示,在以下两种情况下生效:
#kegg.native=FALSE 且 same.layer=FALSE
#kegg.native=TRUE 且 same.layer=TRUE
map.cpdname
#化合物的node是否以化合物的名称标注
#在kegg.native=FALSE. When kegg.native=TRUE时生效
node.sum
#多基因node展示数值的计算方式
#选项包括"sum","mean", "median", "max", "max.abs" and "random".
#默认值为 Default node.sum="sum".
discrete
#告诉函数gene.data或cpd.data是否是离散型数值
#这个参数需要以list的形式输入:
dsicrete=list(gene=FALSE, cpd=FALSE),
limit
#这参数告诉函数gene.data和cpd.data是否需要限制极值,1限制最大值,2则同时
#限制最大值和最小值,这个操作主要是为了归一化颜色,画过热图的同会知道
#同样的,这个参数需要以list的形式输入:
limit=list(gene=1, cpd=1).
bins
#针对上一个参数,bins告诉函数在presodu color中需要分为几个等级
limit=list(gene=10, cpd=10).
low, mid, high
#指定presodu color的颜色当数据是1 directional时,指定只有mid和high会生效
#默认值是"green"-"gray"-"red"(用于gene.data) 和"blue"-"gray"-"yellow" (用于cpd.data)
#用颜色名或RGB代码均可
na.col
#NA node的颜色
data(gse16873.d)#加载内置数据集(表达矩阵)
#如果你自己准备了表达量文件,那你可以这么读入:
filename=system.file("extdata/gse16873.demo", package = "pathview")
filename
[1] "C:/Users/53513/Documents/R/win-library/4.0/pathview/extdata/gse16873.demo"
gse16873=read.delim(filename, row.names=1)
gse16873.d=gse16873[,2*(1:6)]-gse16873[,2*(1:6)-1]
gse16873[1:5,1:5]
HN_1 DCIS_1 HN_2 DCIS_2 HN_3
10000 6.765984 6.458339 6.921720 6.774493 7.010564
10001 6.339474 6.755342 7.177369 6.842597 7.392611
10002 6.591755 6.790304 6.735359 6.773255 6.700016
10003 6.822092 6.590539 6.508452 6.411859 6.575640
100048912 7.356051 7.311144 7.385513 7.333481 7.392233
#加载内置通路数据
data(demo.paths)
head(demo.paths)
#实质是一个list,可以存储通路的ID等信息
data(paths.hsa)
head(paths.hsa)
#这个i=1稍微有点捞,不过大家真正用的时候可以直接写成循环的模式
i <- 1
pv.out <- pathview(gene.data = gse16873.d[, 1], pathway.id = demo.paths$sel.paths[i],
species = "hsa", out.suffix = "gse16873", kegg.native = T)
#这里的gene.data就是基因对应的表达量/FlodChange,pathway.id就是选择需要展示的通路,species不用多说,out.suffix是输出图片的后缀
list.files(pattern="hsa04110", full.names=T)#运行完后会得到以下几个文件
#其中hsa04110.gse16873.png是最原始的通路图,没有加上任何的用户参数
#我们说过,返回的是一个list,你可以这样查看他的结构:
str(pv.out)
#查看你提供的基因map的情况
head(pv.out$plot.data.gene)
###same.layer
#如果你加了same.layer = F这个参数,则会将这个原图上的KEGG gene labels 或 EC numbers
#替换成official gene symbols.
#感受一下:
pv.out <- pathview(gene.data = gse16873.d[, 1], pathway.id = demo.paths$sel.paths[i],
species = "hsa", out.suffix = "gse16873.2layer", kegg.native = T,
same.layer = F)
pv.out <- pathview(gene.data = gse16873.d[, 1], pathway.id = demo.paths$sel.paths[i],
species = "hsa", out.suffix = "gse16873.2layer", kegg.native = T,
same.layer = T)
#如果在kegg.native参数种选择F,那么你将会得到一个矢量图版的PDF,而不是原来位图版的PNG
pv.out <- pathview(gene.data = gse16873.d[, 1], pathway.id = demo.paths$sel.paths[i],
species = "hsa", out.suffix = "gse16873", kegg.native = F,
sign.pos = demo.paths$spos[i])#这个就不用演示了吧
#sign.pos控制node标签的位置,可以选择 "bottomleft" "bottomright" "topright","topleft"
pv.out <- pathview(gene.data = gse16873.d[, 1], pathway.id = demo.paths$sel.paths[i],
species = "hsa", out.suffix = "gse16873", kegg.native = F,
sign.pos = "bottomleft")
#在把kegg.native=F的情况下,你如果选择了多图层same.layer = F,那么你将得到两页pdf,一页是通路图,一页是注释
pv.out <- pathview(gene.data = gse16873.d[, 1], pathway.id = demo.paths$sel.paths[i],
species = "hsa", out.suffix = "gse16873.2layer", kegg.native = F,
sign.pos = demo.paths$spos[i], same.layer = F)
两页:
其他的参数就列在下面大家自己去学习、尝试吧:
keggview.native() 与 keggview.graph()函数的额外参数
plot.data.gene
data.frame returned by node.map function for rendering mapped gene nodes, including node name, type, positions (x, y), sizes (width, height), and mapped gene.data. This data is also used as input for pseduo-color coding through node.color function. Default plot.data.gene=NULL.
plot.data.cpd
same as plot.data.gene function, except for mapped compound node data. d plot.data.cpd=NULL. Default plot.data.cpd=NULL. Note that plot.data.gene and plot.data.cpd can't be NULL simultaneously.
cols.ts.gene
vector or matrix of colors returned by node.color function for rendering gene.data. Dimensionality is the same as the latter. Default cols.ts.gene=NULL.
cols.ts.cpd
same as cols.ts.gene, except corresponding to cpd.data. d cols.ts.cpd=NULL. Note that cols.ts.gene and cols.ts.cpd plot.data.gene can't be NULL simultaneously.
node.data
list returned by node.info function, which parse KGML file directly or indirectly, and extract the node data.
pathway.name
character, the full KEGG pathway name in the format of 3-letter species code with 5-digit pathway id, eg "hsa04612".
out.suffix
character, the suffix to be added after the pathway name as part of the output graph file. Sample names or column names of the gene.data or cpd.data are also added when there are multiple samples. Default out.suffix="pathview".
multi.state
logical, whether multiple states (samples or columns) gene.data or cpd.data should be integrated and plotted in the same graph. Default match.data=TRUE. In other words, gene or compound nodes will be sliced into multiple pieces corresponding to the number of states in the data.
match.data
logical, whether the samples of gene.data and cpd.data are paired. Default match.data=TRUE. When let sample sizes of gene.data and cpd.data be m and n, when m>n, extra columns of NA's (mapped to no color) will be added to cpd.data as to make the sample size the same. This will result in the smae number of slice in gene nodes and compound when multi.state=TRUE.
same.layer
logical, control plotting layers: 1) if node colors be plotted in the same layer as the pathway graph when kegg.native=TRUE, 2) if edge/node type legend be plotted in the same page when kegg.native=FALSE.
res
The nominal resolution in ppi which will be recorded in the bitmap file, if a positive integer. Also used for '
units
' other than the default, and to convert points to pixels. This argument is only effective when kegg.native=TRUE. Default res=300.
cex
A numerical value giving the amount by which plotting text and symbols should be scaled relative to the default 1. Default cex=0.25 when kegg.native=TRUE, cex=0.5 when kegg.native=FALSE.
new.signature
logical, whether pathview signature is added to the pathway graphs. Default new.signature=TRUE.
plot.col.key
logical, whether color key is added to the pathway graphs. Default plot.col.key= TRUE.
key.align
character, controlling how the color keys are aligned when both gene.data and cpd.data are not NULL. Potential values are "x", aligned by x coordinates, and "y", aligned by y coordinates. Default key.align="x".
key.pos
character, controlling the position of color key(s). Potentail values are "bottomleft", "bottomright", "topleft" and "topright". d key.pos="topright".
sign.pos
character, controlling the position of pathview signature. Only effective when kegg.native=FALSE, Signature position is fixed in place of the original KEGG signature when kegg.native=TRUE. Potentail values are "bottomleft", "bottomright", "topleft" and "topright". d sign.pos="bottomright".
path.graph
a graph object parsed from KGML file, only effective when kegg.native=FALSE.
pdf.size
a numeric vector of length 2, giving the width and height of the pathway graph pdf file. Note that pdf width increase by half when same.layer=TRUE to accommodate legends. Only effective when kegg.native=FALSE. Default pdf.size=c(7,7).
rankdir
character, either "LR" (left to right) or "TB" (top to bottom), specifying the pathway graph layout direction. Only effective when kegg.native=FALSE. Default rank.dir="LR".
is.signal
logical, if the pathway is treated as a signaling pathway, where all the unconnected nodes are dropped. This argument also affect the graph layout type, i.e. "dot" for signals or "neato" otherwise. Only effective when kegg.native=FALSE. Default is.signal=TRUE.
afactor
numeric, node amplifying factor. This argument is for node size fine-tuning, its effect is subtler than expected. Only effective when kegg.native=FALSE. Default afctor=1.
text.width
numeric, specifying the line width for text wrap. Only effective when kegg.native= FALSE. Default text.width=15 (characters).
cpd.lab.offset
numeric, specifying how much compound labels should be put above the default position or node center. This argument is useful when map.cpdname=TRUE, i.e. compounds are labelled by full name, which affects the look of compound nodes and color. Only effective when kegg.native=FALSE. Default cpd.lab.offset=1.0.
Details
Pathview maps and renders user data on relevant pathway graphs. Pathview is a stand alone program for pathway based data integration and visualization. It also seamlessly integrates with pathway and functional analysis tools for large-scale and fully automated analysis. Pathview provides strong support for data Integration. It works with: 1) essentially all types of biological data mappable to pathways, 2) over 10 types of gene or protein IDs, and 20 types of compound or metabolite IDs, 3) pathways for over 2000 species as well as KEGG orthology, 4) varoius data attributes and formats, i.e. continuous/discrete data, matrices/vectors, single/multiple samples etc. To see mappable external gene/protein IDs do: data(gene.idtype.list), to see mappable external compound related IDs do: data(rn.list); names(rn.list). Pathview generates both native KEGG view and Graphviz views for pathways. Currently only KEGG pathways are implemented. Hopefully, pathways from Reactome, NCI and other databases will be supported in the future.
Value
From viersion 1.9.3, pathview can accept either a single pathway or multiple pathway ids. The result returned by pathview function is a named list corresponding to the input pathway ids. Each element (for each pathway itself is a named list, with 2 elements ("plot.data.gene", "plot.data.cpd"). Both elements are data.frame or NULL depends on the corresponding input data gene.data and cpd.data. These data.frames record the plot data for mapped gene or compound nodes: rows are mapped genes/compounds, columns are:
kegg.names
standard KEGG IDs/Names for mapped nodes. It'
s Entrez Gene
ID
or
KEGG
Compound
Accessions.
labels
Node labels to be used when needed.
all.mapped
All molecule (gene or compound) IDs mapped to this node.
type
node type, currently 4 types are supported: "gene","enzyme", "compound" and "ortholog".
x
x coordinate in the original KEGG pathway graph.
y
y coordinate in the original KEGG pathway graph.
width
node width in the original KEGG pathway graph.
height
node height in the original KEGG pathway graph.
other columns
columns of the mapped gene/compound data and corresponding pseudo-color codes for individual samples
The results returned by keggview.native and codekeggview.graph are both a list of graph plotting parameters. These are not intended to be used externally.
https://mp.weixin.qq.com/s/5k6YCTsIiBsV1Mg0qyXxwA