Open woneuy01 opened 4 years ago
You can associate a dataset x with a ggplot object with any of the 3 commands: ggplot(data = x) ggplot(x) x %>% ggplot()
In ggplot2, graphs are created by adding layers to the ggplot object: DATA %>% ggplot() + LAYER_1 + LAYER_2 + ... + LAYER_N The geometry layer defines the plot type and takes the format geom_X where X is the plot type. Aesthetic mappings describe how properties of the data connect with features of the graph (axis position, color, size, etc.) Define aesthetic mappings with the aes function. aes uses variable names from the object component (for example, total rather than murders$total). geom_point creates a scatterplot and requires x and y aesthetic mappings. geom_text and geom_label add text to a scatterplot and require x, y, and label aesthetic mappings.
Code: Adding layers to a plot library(tidyverse) library(dslabs) data(murders) murders %>% ggplot() + geom_point(aes(x = population/10^6, y = total))
p <- ggplot(data = murders) p + geom_point(aes(population/10^6, total))
p + geom_point(aes(population/10^6, total)) +
geom_text(aes(population/10^6, total, label = abb))
p + geom_point(aes(population/10^6, total), size = 3) + geom_text(aes(population/10^6, total, label = abb))
p + geom_point(aes(population/10^6, total), size = 3) +
geom_text(aes(population/10^6, total, label = abb), nudge_x = 1)
p <- murders %>% ggplot(aes(population/10^6, total, label = abb)) p + geom_point(size = 3) + geom_text(nudge_x = 1.5) since the aes defined in p globally no need to add aes in geom_point and geom_text
p + geom_point(size = 3) + geom_text(aes(x = 10, y = 800, label = "Hello there!"))
Code: Log-scale the x- and y-axis
library(tidyverse) library(dslabs) data(murders) p <- murders %>% ggplot(aes(population/10^6, total, label = abb))
p + geom_point(size = 3) + geom_text(nudge_x = 0.05) + scale_x_continuous(trans = "log10") + scale_y_continuous(trans = "log10")
p + geom_point(size = 3) +
geom_text(nudge_x = 0.075) +
scale_x_log10() +
scale_y_log10()
p + geom_point(size = 3) + geom_text(nudge_x = 0.075) + scale_x_log10() + scale_y_log10() + xlab("Population in millions (log scale)") + ylab("Total number of murders (log scale)") + ggtitle("US Gun Murders in 2010")
p <- murders %>% ggplot(aes(population/10^6, total, label = abb)) + geom_text(nudge_x = 0.075) + scale_x_log10() + scale_y_log10() + xlab("Population in millions (log scale)") + ylab("Total number of murders (log scale)") + ggtitle("US Gun Murders in 2010")
p + geom_point(size = 3, color = "blue")
p + geom_point(aes(col = region), size = 3)
*col is color
Code: Add a line with average murder rate
r <- murders %>% summarize(rate = sum(total) / sum(population) * 10^6) %>% pull(rate)
p + geom_point(aes(col = region), size = 3) + geom_abline(intercept = log10(r)) # slope is default of 1
p + geom_abline(intercept = log10(r), lty = 2, color = "darkgrey") + geom_point(aes(col = region), size = 3)
Code: Change legend title p <- p + scale_color_discrete(name = "Region") # capitalize legend title
library(dslabs) ds_theme_set()
library(ggthemes) p + theme_economist() # style of the Economist magazine p + theme_fivethirtyeight() # style of the FiveThirtyEight website
library(tidyverse) library(ggrepel) library(ggthemes) library(dslabs) data(murders)
r <- murders %>% summarize(rate = sum(total) / sum(population) * 10^6) %>% .$rate
murders %>%
ggplot(aes(population/10^6, total, label = abb)) +
geom_abline(intercept = log10(r), lty = 2, color = "darkgrey") +
geom_point(aes(col = region), size = 3) +
geom_text_repel() +
scale_x_log10() +
scale_y_log10() +
xlab("Population in millions (log scale)") +
ylab("Total number of murders (log scale)") +
ggtitle("US Gun Murders in 2010") +
scale_color_discrete(name = "Region") +
theme_economist()
*geom_text_repel() when text were too close or crowded
Code: Histograms in ggplot2
library(tidyverse) library(dslabs) data(heights)
p <- heights %>% filter(sex == "Male") %>% ggplot(aes(x = height))
p + geom_histogram()
p + geom_histogram(binwidth = 1)
p + geom_histogram(binwidth = 1, fill = "blue", col = "black") + xlab("Male heights in inches") + ggtitle("Histogram") col = "black" means the line of bar is black
p + geom_density()
p + geom_density(fill = "blue")
p <- heights %>% filter(sex == "Male") %>%
ggplot(aes(sample = height))
p + geom_qq()
params <- heights %>%
filter(sex == "Male") %>%
summarize(mean = mean(height), sd = sd(height))
p + geom_qq(dparams = params) +
geom_abline()
heights %>%
ggplot(aes(sample = scale(height)) +
geom_qq() +
geom_abline()
p <- heights %>% filter(sex == "Male") %>% ggplot(aes(x = height)) p1 <- p + geom_histogram(binwidth = 1, fill = "blue", col = "black") p2 <- p + geom_histogram(binwidth = 2, fill = "blue", col = "black") p3 <- p + geom_histogram(binwidth = 3, fill = "blue", col = "black")
library(gridExtra)
grid.arrange(p1, p2, p3, ncol = 3)
To create a scatter plot, we add a layer with the function geom_point. The aesthetic mappings require us to define the x-axis and y-axis variables respectively. So the code looks like this: murders %>% ggplot(aes(x = , y = )) + geom_point() except we have to fill in the blanks to define the two variables x and y
library(dplyr) library(ggplot2) library(dslabs) data(murders)
murders %>% ggplot(aes(population, total, label = abb)) +
geom_label()
murders %>% ggplot(aes(population, total,label= abb)) +
geom_label(color="blue")
murders %>% ggplot(aes(population, total, label = abb, color=region)) + geom_label()
p <- murders %>% ggplot(aes(population, total, label = abb, color = region)) + geom_label()
p + scale_x_log10() +
scale_y_log10() +
ggtitle("Gun murder data")
heights %>%
ggplot(aes(height,group=sex )) +
geom_density()
heights %>%
ggplot(aes(height, color = sex))+ geom_density()
heights %>% ggplot(aes(height, fill = sex)) + geom_density()
heights %>% ggplot(aes(height, fill = sex)) + geom_density(alpha = 0.2 )
One limitation is that ggplot is designed to work exclusively with data tables. In these data tables, rows have to be observations and columns have to be variables.