Base R Essentials

Data Types

x <- 42              # numeric (double)
y <- 3L              # integer
s <- "hello"         # character
b <- TRUE            # logical
f <- factor(c("a","b","a")) # factor

class(x)             # "numeric"
is.numeric(x)        # TRUE
as.character(42)     # "42"
as.numeric("3.14")   # 3.14

Vectors

v <- c(4, 7, 2, 9, 1)
seq(1, 10, by = 2)       # 1 3 5 7 9
seq_len(5)                # 1 2 3 4 5
rep(c(1, 2), times = 3)  # 1 2 1 2 1 2
rep(c(1, 2), each = 3)   # 1 1 1 2 2 2

length(v)    # 5
sort(v)      # 1 2 4 7 9
rev(v)       # 1 9 2 7 4
unique(c(1,1,2,3)) # 1 2 3
table(c("a","b","a")) # a:2 b:1

v[2]         # 7  (1-indexed!)
v[c(1,3)]   # 4 2
v[-2]        # 4 2 9 1 (exclude 2nd)
v[v > 3]     # 4 7 9

Data Frames

df <- data.frame(
  name = c("Ali","Bo","Cat"),
  score = c(88, 92, 79),
  pass = c(TRUE, TRUE, FALSE)
)

nrow(df)     # 3
ncol(df)     # 3
dim(df)      # 3 3
str(df)      # compact structure
summary(df)  # descriptive stats
head(df, 2)  # first 2 rows
names(df)    # "name" "score" "pass"

Subsetting

# By index: df[row, col]
df[1, ]          # first row
df[, 2]          # second column
df[1:2, c(1,3)] # rows 1-2, cols 1 & 3

# By name
df$score         # score column
df[, "score"]    # same thing
df["score"]      # returns data frame

# Logical subsetting
df[df$score > 80, ]
which(df$score > 80)    # indices: 1 2
subset(df, score > 80, select = c(name, score))

# Modify
df$grade <- c("B+","A-","C+")
df$score[1] <- 90

Functions

# Define a function
bmi <- function(weight, height = 1.70) {
  result <- weight / height^2
  return(round(result, 1))
}
bmi(70)          # 24.2
bmi(70, 1.80)   # 21.6

# Anonymous function
sapply(1:5, function(x) x^2)
# Shorthand (R 4.1+)
sapply(1:5, \(x) x^2)

Control Flow

# if / else
grade <- if (score >= 90) "A" else
         if (score >= 80) "B" else "C"

ifelse(score >= 60, "pass", "fail")

# for loop
total <- 0
for (i in 1:10) { total <- total + i }

# while loop
n <- 1
while (n <= 5) { cat(n, " "); n <- n + 1 }

# apply family — avoid loops!
sapply(df$score, \(x) x / 100)
lapply(df[2:3], mean)       # returns list
vapply(df[2:3], mean, numeric(1))
tapply(df$score, df$pass, mean)
mapply(function(a,b) a+b, 1:3, 10:12)

Math & Statistics

x <- c(12, 7, 3, 15, 9, 6, 11)

mean(x)          # 9
median(x)        # 9
sd(x)            # 3.916...
var(x)           # 15.33...
sum(x)           # 63
range(x)         # 3 15
quantile(x, 0.25) # 6.5

min(x); max(x)
cumsum(x)        # running total
diff(x)          # successive diffs
round(3.456, 1)  # 3.5
ceiling(3.2)     # 4
floor(3.8)       # 3
abs(-5)          # 5
sqrt(16)         # 4
log(100, 10)     # 2

Strings

paste("Hello", "World")       # "Hello World"
paste0("x", 1:3)              # "x1" "x2" "x3"
paste(c("a","b"), collapse=",") # "a,b"

s <- "R is great"
nchar(s)                 # 10
substr(s, 1, 1)          # "R"
toupper(s)               # "R IS GREAT"
tolower(s)               # "r is great"

gsub("great", "fun", s)  # "R is fun"
sub("is", "was", s)      # first match only
grepl("great", s)        # TRUE
grep("r", c("r","R","x")) # 1 (index)
strsplit("a-b-c", "-")   # list: "a" "b" "c"
trimws("  hi  ")         # "hi"
sprintf("%.2f%%", 3.1)   # "3.10%"