Closed ixxmu closed 2 years ago
在组学分析中,一般通过降维算法得到低纬度如二维或三维的新坐标数据,再结合可视化技术去展示样本的在新坐标的空间分布,接着加上统计检验结果证实整体组学水平上组间的差异性。降维算法有基于线性
模型的PCA
,也有基于非线性
的tSNE
和UMAP
等方法。
点赞
、在看
本文,分享至朋友圈集赞20个
并保留30分钟
,截图发至微信mzbj0002
领取。
「木舟笔记2022年度VIP可免费领取」。
「权益:」
「2022」年度木舟笔记所有推文示例数据及代码(「在VIP群里实时更新」)。
木舟笔记「科研交流群」。
「半价」购买跟着Cell学作图系列合集
(免费教程+代码领取)|跟着Cell学作图系列合集。
「收费:」
「99¥/人」。可添加微信:mzbj0002
转账,或直接在文末打赏。
主成分分析(Principal Component Analysis,PCA)是最常用的无监督学习方法。
rm(list = ls())
library(tidyverse)
library(broom)
library(palmerpenguins)
# 示例数据
penguins <- penguins %>%
drop_na() %>%
select(-year)
head(penguins)
# 使用prcomp()进行PCA
# PCA前对数值型数据进行标准化
pca_fit <- penguins %>%
select(where(is.numeric)) %>%
scale() %>%
prcomp()
# 查看成分重要性
summary(pca_fit)
# 可视化PC1和PC2
pca_fit %>%
augment(penguins) %>%
rename_at(vars(starts_with(".fitted")),
list(~str_replace(.,".fitted",""))) %>%
ggplot(aes(x=PC1,
y=PC2,
color=species,
shape=sex))+
geom_point()
## UMAP
rm(list = ls())
library(tidyverse)
library(palmerpenguins)
#install.packages("umap")
library(umap)
theme_set(theme_bw(18))
penguins <- penguins %>%
drop_na() %>%
select(-year)%>%
mutate(ID=row_number())
penguins_meta <- penguins %>%
select(ID, species, island, sex)
set.seed(142)
umap_fit <- penguins %>%
select(where(is.numeric)) %>%
column_to_rownames("ID") %>%
scale() %>%
umap()
umap_df <- umap_fit$layout %>%
as.data.frame()%>%
rename(UMAP1="V1",
UMAP2="V2") %>%
mutate(ID=row_number())%>%
inner_join(penguins_meta, by="ID")
umap_df %>% head()
# 可视化
umap_df %>%
ggplot(aes(x = UMAP1,
y = UMAP2,
color = species,
shape = sex))+
geom_point()+
labs(x = "UMAP1",
y = "UMAP2",
subtitle = "UMAP plot")
# 分面
umap_df %>%
ggplot(aes(x = UMAP1,
y = UMAP2,
color = species)) +
geom_point(size=3, alpha=0.5)+
facet_wrap(~island)+
labs(x = "UMAP1",
y = "UMAP2",
subtitle="UMAP plot")+
theme(legend.position="bottom")
# 圈出异常样本
library(ggforce)
umap_df %>%
ggplot(aes(x = UMAP1,
y = UMAP2,
color = species,
shape = sex)) +
geom_point() +
labs(x = "UMAP1",
y = "UMAP2",
subtitle="UMAP plot") +
geom_circle(aes(x0 = -5, y0 = -3.8, r = 0.65),
color = "green",
inherit.aes = FALSE)
## tSNE
rm(list = ls())
library(tidyverse)
library(palmerpenguins)
library(Rtsne)
theme_set(theme_bw(18))
penguins <- penguins %>%
drop_na() %>%
select(-year)%>%
mutate(ID=row_number())
penguins_meta <- penguins %>%
select(ID,species,island,sex)
set.seed(142)
tSNE_fit <- penguins %>%
select(where(is.numeric)) %>%
column_to_rownames("ID") %>%
scale() %>%
Rtsne()
tSNE_df <- tSNE_fit$Y %>%
as.data.frame() %>%
rename(tSNE1="V1",
tSNE2="V2") %>%
mutate(ID=row_number())
tSNE_df <- tSNE_df %>%
inner_join(penguins_meta, by="ID")
tSNE_df %>% head()
tSNE_df %>%
ggplot(aes(x = tSNE1,
y = tSNE2,
color = species,
shape = sex))+
geom_point()+
theme(legend.position="bottom")
https://mp.weixin.qq.com/s/VWfMo_FGNkolKSHNC8a6IQ