Columns become rows. Use when variables are stored as column names.
# BEFORE (wide) # name math sci # Ali 88 92 # Bo 79 85→
# AFTER (long) # name subject score # Ali math 88 # Ali sci 92 # Bo math 79 # Bo sci 85
library(tidyr)
wide_df |>
pivot_longer(
cols = math:sci, # columns to pivot
names_to = "subject", # new key column
values_to = "score" # new value column
)
# Selecting columns
pivot_longer(cols = -name) # all except name
pivot_longer(cols = starts_with("q"))
pivot_longer(cols = where(is.numeric))
# Parse column names with patterns
# Cols: bp_before, bp_after, hr_before, hr_after
df |> pivot_longer(
cols = -id,
names_to = c("measure", "time"),
names_sep = "_"
)
Rows become columns. Opposite of pivot_longer.
# BEFORE (long) # name subject score # Ali math 88 # Ali sci 92 # Bo math 79 # Bo sci 85→
# AFTER (wide) # name math sci # Ali 88 92 # Bo 79 85
long_df |>
pivot_wider(
names_from = subject, # column for names
values_from = score # column for values
)
# Fill missing values
pivot_wider(
names_from = subject,
values_from = score,
values_fill = 0 # NA -> 0
)
# Multiple value columns
pivot_wider(
names_from = year,
values_from = c(revenue, profit)
)
# BEFORE # date # 2024-03-15 # 2024-07-22→
# AFTER # year month day # 2024 03 15 # 2024 07 22
df |>
separate(
col = date,
into = c("year", "month", "day"),
sep = "-"
)
# Keep original column
separate(date, into = c("y","m","d"),
sep = "-", remove = FALSE)
# Convert types automatically
separate(date, into = c("y","m","d"),
sep = "-", convert = TRUE)
# By position (character index)
separate(code, into = c("prefix","num"),
sep = 3) # split after 3rd char
# Newer alternative: separate_wider_delim()
df |> separate_wider_delim(
date, delim = "-",
names = c("year", "month", "day")
)
# BEFORE # year month day # 2024 03 15 # 2024 07 22→
# AFTER # date # 2024-03-15 # 2024-07-22
df |>
unite(
col = "date", # new column name
year, month, day, # columns to combine
sep = "-" # separator
)
# Keep original columns
unite("full_name", first, last,
sep = " ", remove = FALSE)
# Drop rows with any NA df |> drop_na() # Drop rows where specific columns are NA df |> drop_na(score, grade) # Replace NA with a value df |> replace_na(list( score = 0, grade = "Unknown" )) # Fill NA with previous/next value # quarter revenue # Q1 100 # NA 120 <- should be Q1 # Q2 90 df |> fill(quarter, .direction = "down") # .direction: "down", "up", "downup", "updown" # Complete: make implicit NAs explicit df |> complete(name, year) # Ensures every name x year combination # exists, filling missing with NA df |> complete(name, year, fill = list(val=0))
# Nest: collapse rows into list-columns
nested <- mtcars |>
group_by(cyl) |>
nest()
# Result: tibble with 3 rows (4,6,8 cyl)
# each row has a "data" list-column holding
# the subset data frame
# Useful with purrr for models per group
library(purrr)
nested |>
mutate(
model = map(data, ~ lm(mpg ~ wt, data=.x)),
rsq = map_dbl(model, ~ summary(.x)$r.squared)
)
# Unnest: expand list-columns back to rows
nested |> unnest(cols = data)
# Unnest a column of vectors
tibble(
group = c("a", "b"),
values = list(1:3, 4:5)
) |> unnest(values)
# group values
# a 1
# a 2
# a 3
# b 4
# b 5
# Survey data: one row per student per Q
survey_wide |>
pivot_longer(q1:q20,
names_to = "question",
names_prefix = "q", # strip "q"
names_transform = as.integer,
values_to = "response")
# Summary table for reporting
results |>
group_by(region, year) |>
summarise(revenue = sum(rev)) |>
pivot_wider(
names_from = year,
values_from = revenue
)
# Correlation-ready format
df |>
pivot_wider(
id_cols = date,
names_from = stock,
values_from = price
)