Data Types
x <- 42 # numeric (double)
y <- 3L # integer
s <- "hello" # character
b <- TRUE # logical
f <- factor(c("a","b","a")) # factor
class(x) # "numeric"
is.numeric(x) # TRUE
as.character(42) # "42"
as.numeric("3.14") # 3.14
Vectors
v <- c(4, 7, 2, 9, 1)
seq(1, 10, by = 2) # 1 3 5 7 9
seq_len(5) # 1 2 3 4 5
rep(c(1, 2), times = 3) # 1 2 1 2 1 2
rep(c(1, 2), each = 3) # 1 1 1 2 2 2
length(v) # 5
sort(v) # 1 2 4 7 9
rev(v) # 1 9 2 7 4
unique(c(1,1,2,3)) # 1 2 3
table(c("a","b","a")) # a:2 b:1
v[2] # 7 (1-indexed!)
v[c(1,3)] # 4 2
v[-2] # 4 2 9 1 (exclude 2nd)
v[v > 3] # 4 7 9
Data Frames
df <- data.frame(
name = c("Ali","Bo","Cat"),
score = c(88, 92, 79),
pass = c(TRUE, TRUE, FALSE)
)
nrow(df) # 3
ncol(df) # 3
dim(df) # 3 3
str(df) # compact structure
summary(df) # descriptive stats
head(df, 2) # first 2 rows
names(df) # "name" "score" "pass"
Subsetting
# By index: df[row, col]
df[1, ] # first row
df[, 2] # second column
df[1:2, c(1,3)] # rows 1-2, cols 1 & 3
# By name
df$score # score column
df[, "score"] # same thing
df["score"] # returns data frame
# Logical subsetting
df[df$score > 80, ]
which(df$score > 80) # indices: 1 2
subset(df, score > 80, select = c(name, score))
# Modify
df$grade <- c("B+","A-","C+")
df$score[1] <- 90
Functions
# Define a function
bmi <- function(weight, height = 1.70) {
result <- weight / height^2
return(round(result, 1))
}
bmi(70) # 24.2
bmi(70, 1.80) # 21.6
# Anonymous function
sapply(1:5, function(x) x^2)
# Shorthand (R 4.1+)
sapply(1:5, \(x) x^2)
Control Flow
# if / else
grade <- if (score >= 90) "A" else
if (score >= 80) "B" else "C"
ifelse(score >= 60, "pass", "fail")
# for loop
total <- 0
for (i in 1:10) { total <- total + i }
# while loop
n <- 1
while (n <= 5) { cat(n, " "); n <- n + 1 }
# apply family — avoid loops!
sapply(df$score, \(x) x / 100)
lapply(df[2:3], mean) # returns list
vapply(df[2:3], mean, numeric(1))
tapply(df$score, df$pass, mean)
mapply(function(a,b) a+b, 1:3, 10:12)
Math & Statistics
x <- c(12, 7, 3, 15, 9, 6, 11)
mean(x) # 9
median(x) # 9
sd(x) # 3.916...
var(x) # 15.33...
sum(x) # 63
range(x) # 3 15
quantile(x, 0.25) # 6.5
min(x); max(x)
cumsum(x) # running total
diff(x) # successive diffs
round(3.456, 1) # 3.5
ceiling(3.2) # 4
floor(3.8) # 3
abs(-5) # 5
sqrt(16) # 4
log(100, 10) # 2
Strings
paste("Hello", "World") # "Hello World"
paste0("x", 1:3) # "x1" "x2" "x3"
paste(c("a","b"), collapse=",") # "a,b"
s <- "R is great"
nchar(s) # 10
substr(s, 1, 1) # "R"
toupper(s) # "R IS GREAT"
tolower(s) # "r is great"
gsub("great", "fun", s) # "R is fun"
sub("is", "was", s) # first match only
grepl("great", s) # TRUE
grep("r", c("r","R","x")) # 1 (index)
strsplit("a-b-c", "-") # list: "a" "b" "c"
trimws(" hi ") # "hi"
sprintf("%.2f%%", 3.1) # "3.10%"