Basic Structure
library(ggplot2)
# Every ggplot has 3 parts:
# 1. Data 2. Aesthetics 3. Geom
ggplot(data = mpg, aes(x = displ, y = hwy)) +
geom_point()
# Store and build incrementally
p <- ggplot(mpg, aes(displ, hwy))
p + geom_point() + geom_smooth()
Geoms — One Variable
# Continuous
ggplot(mpg, aes(hwy)) +
geom_histogram(bins = 20, fill = "#e04a2f")
ggplot(mpg, aes(hwy)) +
geom_density(fill = "steelblue", alpha = 0.5)
# Discrete
ggplot(mpg, aes(class)) +
geom_bar(fill = "#e04a2f")
Geoms — Two Variables
# Scatter
ggplot(mpg, aes(displ, hwy)) +
geom_point(aes(color = class), size = 2)
# Line (time series)
ggplot(economics, aes(date, unemploy)) +
geom_line(color = "#e04a2f", linewidth = 0.8)
# Bar (pre-computed values)
ggplot(df, aes(category, value)) +
geom_col(fill = "steelblue")
# Boxplot
ggplot(mpg, aes(class, hwy)) +
geom_boxplot(fill = "#fde8e4")
# Violin
ggplot(mpg, aes(class, hwy)) +
geom_violin(fill = "#e04a2f", alpha = 0.3) +
geom_jitter(width = 0.15, size = 0.8)
# Area
ggplot(economics, aes(date, unemploy)) +
geom_area(fill = "#e04a2f", alpha = 0.4)
# Heatmap / tile
ggplot(df, aes(x_var, y_var, fill = value)) +
geom_tile()
Aesthetics
# Map data to visual properties inside aes()
aes(x, y, color, fill, size, shape,
alpha, linetype, group)
# Set fixed values OUTSIDE aes()
geom_point(color = "red", size = 3)
# Map variable to aesthetic INSIDE aes()
geom_point(aes(color = class, size = cyl))
# Common shapes: 0-25 (16=filled circle)
# Common linetypes: "solid","dashed",
# "dotted","dotdash","longdash"
Facets
# Wrap by one variable
ggplot(mpg, aes(displ, hwy)) +
geom_point() +
facet_wrap(~ class, ncol = 3)
# Grid by two variables
ggplot(mpg, aes(displ, hwy)) +
geom_point() +
facet_grid(drv ~ cyl)
# Free scales
facet_wrap(~ class, scales = "free_y")
Scales
# Axis limits and breaks
scale_x_continuous(
limits = c(0, 50),
breaks = seq(0, 50, 10)
)
scale_y_log10() # log scale
# Color / fill
scale_color_manual(
values = c("a"="#e04a2f","b"="steelblue")
)
scale_fill_brewer(palette = "Set2")
scale_color_gradient(low="white", high="red")
scale_color_viridis_d() # colorblind-safe
Labels & Annotations
ggplot(mpg, aes(displ, hwy)) +
geom_point() +
labs(
title = "Engine Size vs. Fuel Efficiency",
subtitle = "EPA data for 234 cars",
x = "Displacement (L)",
y = "Highway MPG",
color = "Vehicle Class",
caption = "Source: fueleconomy.gov"
)
# Add text annotations
+ annotate("text", x=5, y=40,
label="Outlier region", color="red")
+ annotate("rect", xmin=4, xmax=6,
ymin=35, ymax=45, alpha=0.1)
Themes
# Built-in themes
+ theme_minimal() # clean, no boxes
+ theme_bw() # white bg, grid
+ theme_classic() # axes only
+ theme_void() # blank (maps)
# Custom tweaks
+ theme(
plot.title = element_text(
face = "bold", size = 14),
axis.text.x = element_text(
angle = 45, hjust = 1),
legend.position = "bottom",
panel.grid.minor = element_blank()
)
Advanced Geoms & Coordinates
# Trend line
geom_smooth(method = "lm", se = TRUE)
geom_smooth(method = "loess", span = 0.5)
# Summary statistics
stat_summary(fun = mean,
geom = "point", size = 3)
# Flip coordinates
+ coord_flip()
# Dodged bars (grouped)
ggplot(df, aes(group, value, fill = cat)) +
geom_col(position = position_dodge(0.9))
# Stacked bars
geom_col(position = "stack") # default
geom_col(position = "fill") # 100%
Saving Plots
# Save last plot
ggsave("plot.png", width = 8, height = 5,
dpi = 300)
# Save a stored plot
ggsave("plot.pdf", plot = p,
width = 10, height = 6)
# Common formats: png, pdf, svg, jpg