Closed ixxmu closed 2 years ago
最远的距离是想和做
ggplot 给我们提供了很多基础的图形及统计函数,有时候可能我们需要根据自的特殊需求来统计分析作图,那么对于每次书写繁杂的代码显然是不太方便。好在ggplot 提供了可以构建自己的 geom 或者 stat 函数来满足不同场景的需求。
接下来跟着 Mastering Software Development in R 这本书里的相关内容介绍,如何构建自己的图层。
主要分为两步:
ggproto()
函数用于构造一个与 geom 对应的新类。这个新类指定了许多属性和函数,用于描述如何在图形上绘制数据。geom
函数构造为正则函数。这个函数返回一个可以添加到使用 ggplot()函数创建的图形中的层。
ggproto 基本结构:
GeomNEW <- ggproto("GeomNEW", Geom,
required_aes = <a character vector of required aesthetics>,
default_aes = aes(<default values for certain aesthetics>),
draw_key = <a function used to draw the key in the legend>,
draw_panel = function(data, panel_scales, coord) {
## Function that returns a grid grob that will
## be plotted (this is where the real work occurs)
}
)
data
,panel_scales/panel_params
及coord
三个参数,还需要基础的 grid 包的绘图知识,展示如何绘制图形。下面构建一个简单的点图层:
library(ggplot2)
library(grid)
# ggproto
GeomMyPoint <- ggproto("GeomMyPoint", Geom,
required_aes = c("x", "y"),
default_aes = aes(shape = 1),
draw_key = draw_key_point,
draw_panel = function(data, panel_scales, coord) {
## Transform the data first
coords <- coord$transform(data, panel_scales)
## Let's print out the structure of the 'coords' object
str(coords)
## Construct a grid grob
pointsGrob(
x = coords$x,
y = coords$y,
pch = coords$shape
)
})
# geom
geom_mypoint <- function(mapping = NULL, data = NULL, stat = "identity",
position = "identity", na.rm = FALSE,
show.legend = NA, inherit.aes = TRUE, ...) {
ggplot2::layer(
geom = GeomMyPoint, mapping = mapping,
data = data, stat = stat, position = position,
show.legend = show.legend, inherit.aes = inherit.aes,
params = list(na.rm = na.rm, ...)
)
}
我们测试绘图看看:
ggplot(mtcars,aes(x = mpg,y = disp)) +
geom_mypoint()
# 'data.frame': 32 obs. of 5 variables:
# $ x : num 0.456 0.456 0.525 0.471 0.367 ...
# $ y : num 0.247 0.247 0.129 0.469 0.701 ...
# $ PANEL: Factor w/ 1 level "1": 1 1 1 1 1 1 1 1 1 1 ...
# $ group: int -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 ...
# $ shape: num 1 1 1 1 1 1 1 1 1 1 ...
str(coords) 可以看到经过转换的坐标及对应的映射属性信息,坐标已经转换为 grid 里的 0-1 范围内。
我们再看个例子:
GeomAutoTransparent <- ggproto("GeomAutoTransparent", Geom,
required_aes = c("x", "y"),
default_aes = aes(shape = 19),
draw_key = draw_key_point,
draw_panel = function(data, panel_scales, coord) {
## Transform the data first
coords <- coord$transform(data, panel_scales)
## Compute the alpha transparency factor based on the
## number of data points being plotted
n <- nrow(data)
if(n > 100 && n <= 200)
coords$alpha <- 0.3
else if(n > 200)
coords$alpha <- 0.15
else
coords$alpha <- 1
## Construct a grid grob
grid::pointsGrob(
x = coords$x,
y = coords$y,
pch = coords$shape,
gp = grid::gpar(alpha = coords$alpha)
)
})
geom_transparent <- function(mapping = NULL, data = NULL, stat = "identity",
position = "identity", na.rm = FALSE,
show.legend = NA, inherit.aes = TRUE, ...) {
ggplot2::layer(
geom = GeomAutoTransparent, mapping = mapping,
data = data, stat = stat, position = position,
show.legend = show.legend, inherit.aes = inherit.aes,
params = list(na.rm = na.rm, ...)
)
}
可以看到,在 对根据数据数量计算不同的透明度, 然后再作图:
library(faraway)
ggplot(data = worldcup, aes(Time, Shots)) +
geom_transparent() +
facet_wrap(~ Position, ncol = 2)
构建 Stat 可以用来进行更多复杂的计算,基本结构如下:
StatNEW <- ggproto("StatNEW", Stat,
compute_group = <a function that does computations>,
default_aes = aes(<default values for certain aesthetics>),
required_aes = <a character vector of required aesthetics>)
compute_group 是主要用来如何对数据进行计算的,很重要。
看看数据:
library(datasets)
library(dplyr)
data("airquality")
monthly <- dplyr::group_by(airquality, Month) %>%
dplyr::summarize(ozone = mean(Ozone, na.rm = TRUE),
stderr = sd(Ozone, na.rm = TRUE) / sqrt(sum(!is.na(Ozone))))
monthly
# A tibble: 5 x 3
Month ozone stderr
<int> <dbl> <dbl>
1 5 23.6 4.36
2 6 29.4 6.07
3 7 59.1 6.20
4 8 60.0 7.78
5 9 31.4 4.48
散点图:
ggplot(monthly, aes(x = Month, y = ozone)) +
geom_point() +
ylab("Ozone (ppb)")
我们通过计算的标准误差来计算绘制置信区间线段的坐标:
StatConfint <- ggproto("StatConfint", Stat,
compute_group = function(data, scales) {
## Compute the line segment endpoints
x <- data$x
xend <- data$x
y <- data$y - 1.96 * data$stderr
yend <- data$y + 1.96 * data$stderr
## Return a new data frame
data.frame(x = x, xend = xend,
y = y, yend = yend)
},
required_aes = c("x", "y", "stderr")
)
然后构建 segment 图层的函数:
stat_confint <- function(mapping = NULL, data = NULL, geom = "segment",
position = "identity", na.rm = FALSE,
show.legend = NA, inherit.aes = TRUE, ...) {
ggplot2::layer(
stat = StatConfInt,
data = data,
mapping = mapping,
geom = geom,
position = position,
show.legend = show.legend,
inherit.aes = inherit.aes,
params = list(na.rm = na.rm, ...)
)
}
使用:
ggplot(data = monthly, aes(x = Month, y = ozone, stderr = stderr)) +
geom_point() +
ylab("Ozone (ppb)") +
geom_segment(stat = "confint")
有时候你 需要特殊的计算数据,然后使用绘制特殊的图形, 显然单个 geom 或者 stat 并不能满足需要,此时你需要构建一个 stat 和一个 geom 结合起来来分别进行计算数据和绘制特殊图形。
下面示例图形:
## This code is not runnable yet!
library(ggplot2)
library(datasets)
data(airquality)
mutate(airquality, Month = factor(Month)) %>%
ggplot(aes(Month, Ozone)) +
geom_skinnybox()
图形说明:
The “whiskers” extend to the minimum and the maximum of the data. The medians are represented by a point rather than a line. There is no box indicating the region between the 25th and 75th percentiles.
构建 stat:
StatSkinnybox <- ggproto("StatSkinnybox", Stat,
compute_group = function(data, scales) {
probs <- c(0, 0.25, 0.5, 0.75, 1)
qq <- quantile(data$y, probs, na.rm = TRUE)
out <- qq %>% as.list %>% data.frame
names(out) <- c("ymin", "lower", "middle",
"upper", "ymax")
out$x <- data$x[1]
out
},
required_aes = c("x", "y")
)
stat_skinnybox <- function(mapping = NULL, data = NULL, geom = "skinnybox",
position = "identity", show.legend = NA,
outliers = TRUE, inherit.aes = TRUE, ...) {
ggplot2::layer(
stat = StatSkinnybox,
data = data,
mapping = mapping,
geom = geom,
position = position,
show.legend = show.legend,
inherit.aes = inherit.aes,
params = list(outliers = outliers, ...)
)
}
构建 geom:
library(scales)
draw_panel_function <- function(data, panel_scales, coord) {
coords <- coord$transform(data, panel_scales) %>%
mutate(lower = rescale(lower, from = panel_scales$y.range),
upper = rescale(upper, from = panel_scales$y.range),
middle = rescale(middle, from = panel_scales$y.range))
med <- pointsGrob(x = coords$x,
y = coords$middle,
pch = coords$shape)
lower <- segmentsGrob(x0 = coords$x,
x1 = coords$x,
y0 = coords$ymin,
y1 = coords$lower,
gp = gpar(lwd = coords$size))
upper <- segmentsGrob(x0 = coords$x,
x1 = coords$x,
y0 = coords$upper,
y1 = coords$ymax,
gp = gpar(lwd = coords$size))
gTree(children = gList(med, lower, upper))
}
GeomSkinnybox <- ggproto("GeomSkinnybox", Geom,
required_aes = c("x", "ymin", "lower", "middle",
"upper", "ymax"),
default_aes = aes(shape = 19, lwd = 2),
draw_key = draw_key_point,
draw_panel = draw_panel_function
)
然后注意这里的 stat = "skinnybox":
geom_skinnybox <- function(mapping = NULL, data = NULL, stat = "skinnybox",
position = "identity", show.legend = NA,
na.rm = FALSE, inherit.aes = TRUE, ...) {
layer(
data = data,
mapping = mapping,
stat = stat,
geom = GeomSkinnybox,
position = position,
show.legend = show.legend,
inherit.aes = inherit.aes,
params = list(na.rm = na.rm, ...)
)
}
这一块还是很难的,网上教程也不是很多,需要好好理解,会用就行。下一节讲讲如何构建自己的映射问题。
欢迎加入生信交流群。加我微信我也拉你进 微信群聊 老俊俊生信交流群
(微信交流群需收取20元入群费用(防止骗子和便于管理)
)。
老俊俊微信:
知识星球:
所以今天你学习了吗?
最后欢迎大家分享转发,您的点赞是对我的鼓励和肯定!
如果觉得对您帮助很大,赏杯快乐水喝喝吧!
往期回顾
◀...
https://mp.weixin.qq.com/s/bhopxUN7saUFc2gnmYZAJA