Warning: not so basic stuff for non functional language fans 8-)
Check: + https://github.com/hadley/devtools/wiki/Functional-programming + http://adv-r.had.co.nz/Data-structures.html
func <- function(x) x%%2==0 # lambda expressions
func(4)
## [1] TRUE
(function(x)x%%2==0)(4)
## [1] TRUE
# Filter
Filter((function(x)x%%2==0),1:20)
## [1] 2 4 6 8 10 12 14 16 18 20
# Map
mapply((function(x)x*2),1:20)
## [1] 2 4 6 8 10 12 14 16 18 20 22 24 26 28 30 32 34 36 38 40
# Fold (foldl by default, use right=T for foldr)
# use accumulate=T for scan
Reduce((function(x,acc)x+acc),1:10,0) # eg, vector sum
## [1] 55
Reduce((function(x,acc)x*acc),1:10,1) # eg, vector product
## [1] 3628800
Reduce((function(x,acc)x*acc),1:10,1,accumulate=T)
## [1] 1 1 2 6 24 120 720 5040
## [9] 40320 362880 3628800
# returns the 1st element that satisfies the predicate
Find((function(x)x%%2==0),20:1)
## [1] 20
# returns the index of the 1st element that satisfies the predicate
Position((function(x)x%%2==0),20:1)
## [1] 1
# these high-order functions work for every R function
formals(function(x=4)x+5)
## $x
## [1] 4
body(function(x=4)x+5)
## x + 5
environment(function(x=4)x+5)
## <environment: R_GlobalEnv>
# eg, apply sd to all columns of mtcars data frame, and then
# turn the resulting list into a vector
unlist(lapply(mtcars,sd))
## mpg cyl disp hp drat wt
## 6.0269481 1.7859216 123.9386938 68.5628685 0.5346787 0.9784574
## qsec vs am gear carb
## 1.7869432 0.5040161 0.4989909 0.7378041 1.6152000
ref: http://stackoverflow.com/questions/3505701/r-grouping-functions-sapply-vs-lapply-vs-apply-vs-tapply-vs-by-vs-aggrega
# Two dimensional matrix
M <- matrix(seq(1,16), 4, 4)
M
## [,1] [,2] [,3] [,4]
## [1,] 1 5 9 13
## [2,] 2 6 10 14
## [3,] 3 7 11 15
## [4,] 4 8 12 16
# apply min to rows
apply(M, 1, min)
## [1] 1 2 3 4
# apply max to columns
apply(M, 2, max)
## [1] 4 8 12 16
# apply double for each cell
apply(M, c(1,2), function(x) 2*x)
## [,1] [,2] [,3] [,4]
## [1,] 2 10 18 26
## [2,] 4 12 20 28
## [3,] 6 14 22 30
## [4,] 8 16 24 32
# 3 dimensional array
M <- array( seq(32), dim = c(4,4,2))
M
## , , 1
##
## [,1] [,2] [,3] [,4]
## [1,] 1 5 9 13
## [2,] 2 6 10 14
## [3,] 3 7 11 15
## [4,] 4 8 12 16
##
## , , 2
##
## [,1] [,2] [,3] [,4]
## [1,] 17 21 25 29
## [2,] 18 22 26 30
## [3,] 19 23 27 31
## [4,] 20 24 28 32
# Apply f across each M[*, , ] - i.e across 2nd and 3rd dimension
apply(M, 1, max)
## [1] 29 30 31 32
apply(M, 1, sum) # Result is one-dimensional
## [1] 120 128 136 144
# Apply sum across each M[*, *, ] - i.e Sum across 3rd dimension
apply(M, c(1,2), sum)
## [,1] [,2] [,3] [,4]
## [1,] 18 26 34 42
## [2,] 20 28 36 44
## [3,] 22 30 38 46
## [4,] 24 32 40 48
# Result is two-dimensional
f <- function(x) x^2
as.vector(lapply(1:6,f), mode="integer") # change list to vector just for tidy output
## [1] 1 4 9 16 25 36
sapply(1:10,f) # does the same thing
## [1] 1 4 9 16 25 36 49 64 81 100
matrix(sapply(1:25,f),5,5)
## [,1] [,2] [,3] [,4] [,5]
## [1,] 1 36 121 256 441
## [2,] 4 49 144 289 484
## [3,] 9 64 169 324 529
## [4,] 16 81 196 361 576
## [5,] 25 100 225 400 625
add <- function(x, y) x + y
sapply(1:10, add, 3) # the 3 is passed to add() as its 2nd argument
## [1] 4 5 6 7 8 9 10 11 12 13
sapply(1:10, `+`, 3) # search object *called* as '+'
## [1] 4 5 6 7 8 9 10 11 12 13
sapply(1:10, "+", 3) # search object *named* as '+'
## [1] 4 5 6 7 8 9 10 11 12 13
x <- list(1:3, 4:9, 10:12)
sapply(x, "[", 2) # equivalent to sapply(x, function(x) x[2])
## [1] 2 5 11
An eg with list of functions:
summary <- function(x) {
funs <- c(mean, median, sd, mad, IQR)
lapply(funs, function(f) f(x, na.rm = TRUE))
}
summary(rnorm(100))
## [[1]]
## [1] 0.08504035
##
## [[2]]
## [1] -0.09771956
##
## [[3]]
## [1] 1.035898
##
## [[4]]
## [1] 1.042071
##
## [[5]]
## [1] 1.41872
f(runif(10))
## [1] 0.402364169 0.425024010 0.135703288 0.361465132 0.395346852
## [6] 0.001033643 0.485006278 0.286659677 0.097783390 0.036028147
replicate(3,f(runif(10))) # replicates 3 times the previous instruction
## [,1] [,2] [,3]
## [1,] 0.36552085 0.0311398468 0.06603426
## [2,] 0.07281059 0.3269985617 0.53624294
## [3,] 0.01150483 0.4394893929 0.04703448
## [4,] 0.32840718 0.0191090669 0.42680599
## [5,] 0.41376382 0.0009683344 0.01910608
## [6,] 0.37684437 0.3345655828 0.00405777
## [7,] 0.05967578 0.6927456121 0.17158436
## [8,] 0.64129500 0.0263523135 0.03000256
## [9,] 0.12957366 0.3716564637 0.92696523
## [10,] 0.88994004 0.1201024499 0.01843136
outer(1:5,1:3,"*")
## [,1] [,2] [,3]
## [1,] 1 2 3
## [2,] 2 4 6
## [3,] 3 6 9
## [4,] 4 8 12
## [5,] 5 10 15
1:5 %o% 1:3 # same thing
## [,1] [,2] [,3]
## [1,] 1 2 3
## [2,] 2 4 6
## [3,] 3 6 9
## [4,] 4 8 12
## [5,] 5 10 15
mapply(rep, 9:6, 1:4)
## [[1]]
## [1] 9
##
## [[2]]
## [1] 8 8
##
## [[3]]
## [1] 7 7 7
##
## [[4]]
## [1] 6 6 6 6
f1 <- function(x,y,z) 100*x+10*y+z
mapply(f1,1:3,4:6,7:9)
## [1] 147 258 369
Another important function is fold
which is vector reduction to a value by applying some associate function on a list (given an identity value for empty lists) :
# SOurce: Brian Rowe's "Modeling Data with Functional Programming in R"
fold <- function(xs, fn, acc, ...) {
sapply(xs, function(x) acc <<- fn(x, acc), ...)
acc
}
There are a lot of functions that can be defined this way:
my_sum <- function (xs) fold(xs, `+`, 0)
my_sum(1:4)
## [1] 10
my_sum(c())
## [1] 0
my_len <- function(xs) fold(xs, function(x,acc) 1+acc, 0)
my_len(c(0,1,2,3))
## [1] 4
When calling a function you can specify arguments by position, by complete name, or by partial name. Arguments are matched first by exact name (perfect matching), then by prefix matching and finally by position.
f <- function(abcdef, bcde1, bcde2) {
list(a = abcdef, b1 = bcde1, b2 = bcde2)
}
str(f(1, 2, 3))
## List of 3
## $ a : num 1
## $ b1: num 2
## $ b2: num 3
str(f(2, 3, abcdef = 1))
## List of 3
## $ a : num 1
## $ b1: num 2
## $ b2: num 3
# Can abbreviate long argument names:
str(f(2, 3, a = 1))
## List of 3
## $ a : num 1
## $ b1: num 2
## $ b2: num 3
# But this doesn't work because abbreviation is ambiguous
str(f(1, 3, b = 1))
## Error in f(1, 3, b = 1): argument 3 matches multiple formal arguments
Calling a function given a list of arguments
args <- list(1:10, na.rm = TRUE)
do.call(mean, args) # same as mean(1:10, na.rm = TRUE)
## [1] 5.5
R can check if an argument is missing:
f <- function(x,y) {
c(missing(x),missing(y))
}
f(x=1)
## [1] FALSE TRUE
f(y=2)
## [1] TRUE FALSE
f(,3)
## [1] TRUE FALSE
f(4,)
## [1] FALSE TRUE
Lazy Eval: R uses lazy evaluation when dealing with function arguments, it olny computes them if necessary
f <- function(x,y) {
x*2
}
f(4,stop("error!"))
## [1] 8
This might bring some subtle problems:
add <- function(x) {
function(y) x + y
}
adders <- lapply(1:10, add) # a list of functions
adders[[1]](5) # hmmm... (the last value of x in the vector cycle above is 10)
## [1] 6
adders[[10]](5) # ok
## [1] 15
# this is solved by forcing the evaluation of 'x' in each element of the vector cycle
add <- function(x) {
force(x)
function(y) x + y
}
adders2 <- lapply(1:10, add)
adders2[[1]](5)
## [1] 6
adders2[[10]](5)
## [1] 15
Default arguments are evaluated inside the function. This means that if the expression depends on the current environment the results will differ depending on whether you use the default value or explicitly provide one
f <- function(x = ls()) {
a <- 1
x
}
# ls() evaluated inside f:
f()
## [1] "a" "x"
# ls() evaluated in global environment:
f(ls())
## [1] "add" "adders" "adders2" "args" "f" "f1" "fold"
## [8] "func" "M" "my_len" "my_sum" "summary" "x"
More technically, an unevaluated argument is called a promise, or (less commonly) a thunk. A promise is made up of two parts:
- the expression which gives rise to the delayed computation. It can be accessed with substitute()
- the environment where the expression was created and where it should be evaluated
The first time a promise is accessed the expression is evaluated in the environment where it was created. This value is cached, so that subsequent access to the evaluated promise does not recompute the value (but the original expression is still associated with the value, so substitute can continue to access it). ref
substitute(expression(a + b), list(a = 1))
## expression(1 + b)
The special argument ...
passes all non-matched args to the inner functions
f <- function(...) {
names(list(...))
}
f(a = 1, b = 2)
## [1] "a" "b"
f <- function(x, y, ...) {
g <- function(z, w=1) {
x*1000+y*100+z*10+w
}
}
f1 <- f(1,2,z=3)
f1(4)
## [1] 1241
f1(w=4,z=5)
## [1] 1254
Infix Functions: use %name% to enclose the function name
"%+%" <- function(a, b) paste(a, b, sep = "")
"new" %+% " string"
## [1] "new string"
`%+%`("new", " string") # alternative call
## [1] "new string"
`+`(1, 5)
## [1] 6
# use \ for special chars
"%/\\%" <- function(a, b) paste(a, b)
"a" %/\% "b"
## [1] "a b"
An eg that creates a Matlab-like DSL for matrix descriptions:
qm<-function(...)
{
# turn ... into string
args<-deparse(substitute(rbind(cbind(...))))
# create "rbind(cbind(.),cbind(.),.)" construct
args<-gsub("\\|","),cbind(",args)
# eval
eval(parse(text=args))
}
M<-N<-diag(2)
qm(M,c(4,5) | c(1,2),N | t(1:3))
## [,1] [,2] [,3]
## [1,] 1 0 4
## [2,] 0 1 5
## [3,] 1 1 0
## [4,] 2 0 1
## [5,] 1 2 3
An object is data with functions. A closure is a function with data. – John D Cook
# returns a new function which as access to the environment
# variable 'exponent'
power <- function(exponent) {
function(x) x ^ exponent
}
square <- power(2)
square(2)
## [1] 4
square(4)
## [1] 16
cube <- power(3)
cube(2)
## [1] 8
cube(4)
## [1] 64
as.list(environment(square)) # shows the closure's environment
## $exponent
## [1] 2
# Closures are useful for making function factories,
missing_remover <- function(na) {
function(x) {
x[x == na] <- NA
x
}
}
remove_99 <- missing_remover(99)
remove_99(c(99,100,101,99,98))
## [1] NA 100 101 NA 98
remove_dot <- missing_remover(".")
remove_dot(c(".","a",".","b"))
## [1] NA "a" NA "b"
# And are one way to manage mutable state in R.
new_counter <- function() {
i <- 0
function() {
i <<- i + 1 # operator '<<-' searches for 'i' in the parent environment
i
}
}
counter_one <- new_counter()
counter_two <- new_counter()
counter_one()
## [1] 1
counter_one()
## [1] 2
counter_two()
## [1] 1
as.list(environment(counter_one)) # check its mutable state
## $i
## [1] 2
as.list(environment(counter_two))
## $i
## [1] 1
currying is the technique of transforming a function that takes multiple arguments (or a tuple of arguments) in such a way that it can be called as a chain of functions, each with a single argument (partial application) – Wikipedia
# list of functions and currying
#eg, mean functions
compute_mean <- list(
base = function(x) mean(x),
sum = function(x) sum(x) / length(x),
manual = function(x) {
total <- 0
n <- length(x)
for (i in seq_along(x)) {
total <- total + x[i] / n
}
total
}
)
xs <- runif(1e5)
system.time(compute_mean$base(xs))
## user system elapsed
## 0 0 0
system.time(compute_mean$sum(xs))
## user system elapsed
## 0 0 0
system.time(compute_mean$manual(xs))
## user system elapsed
## 0.07 0.00 0.08
# or test all in one line
lapply(compute_mean, function(f) system.time(f(xs)))
## $base
## user system elapsed
## 0 0 0
##
## $sum
## user system elapsed
## 0 0 0
##
## $manual
## user system elapsed
## 0.08 0.00 0.07
Map(function(f) system.time(f(xs)), compute_mean)
## $base
## user system elapsed
## 0 0 0
##
## $sum
## user system elapsed
## 0 0 0
##
## $manual
## user system elapsed
## 0.08 0.00 0.08
# another way
call_fun <- function(f, ...) f(...)
timer <- function(f) {
force(f) # force the evaluation of expression
function(...) system.time(f(...))
}
timers <- lapply(compute_mean, timer) # return a list of functions
lapply(timers, call_fun, xs)
## $base
## user system elapsed
## 0 0 0
##
## $sum
## user system elapsed
## 0 0 0
##
## $manual
## user system elapsed
## 0.08 0.00 0.08
# implementation of currying:
Curry <- function(FUN,...) {
.orig <- list(...)
function(...) {
do.call(FUN, c(.orig, list(...)))
}
}
add <- function(x, y) x + y
addOne <- Curry(add, y = 1)
addOne(4) # 5
## [1] 5
# using curry in interesting ways:
funs <- list(
sum = sum,
mean = mean,
median = median
)
# now turn that list elements, into functions that remove NAs
funs2 <- lapply(funs, Curry, na.rm = TRUE)
Package pryr
implements currying with partial()
# library(devtools)
# install_github("pryr")
library(pryr)
##
## Attaching package: 'pryr'
##
## The following object is masked _by_ '.GlobalEnv':
##
## f
f <- function(x,y) 10*x+y
f(5,6)
## [1] 56
f1 <- partial(f, x=5)
f1(6)
## [1] 56
f2 <- partial(f, y=6)
f2(5)
## [1] 56
Function operators (FO) are functions that take one (or more) functions as input and return a function as output.
we’ll explore four types of function operators (FOs):
Behavioural FOs. While leaving the function otherwise unchanged, this type can do things like automatically log when the function is run, ensure that a function is run only once, and delay the operation of a function.
Output FOs. This type can return different values depending on whether a function throws an error, or negates the result of a logical predicate.
Input FOs. This type can modify inputs like partially evaluating a function, convert a function that takes multiple arguments to one that takes a list, or automatically vectorise a function.
Combining FOs. This type can combine the results of predicate functions with boolean operators, or compose multiple function calls.
Behavioural FOs leave the inputs and outputs of a function unchanged, but adds some extra behaviour.
# add a delay to a function call:
delay_by <- function(delay, f) {
function(...) {
Sys.sleep(delay)
f(...)
}
}
system.time(runif(100))
## user system elapsed
## 0 0 0
system.time(delay_by(1, runif)(100))
## user system elapsed
## 0.00 0.00 1.01
# add a dot every 10 processing units
dot_every <- function(n, f) {
i <- 1
function(...) {
if (i %% n == 0) cat(".")
i <<- i + 1
f(...)
}
}
x <- lapply(1:100, runif)
x <- lapply(1:100, dot_every(10, runif))
## ..........
fib <- function(n) {
if (n < 2)
return(1)
fib(n - 2) + fib(n - 1)
}
system.time(fib(28))
## user system elapsed
## 1.63 0.00 1.64
###### MEMOISE IT!
library(memoise)
fib2 <- memoise(
function(n) {
if (n < 2)
return(1)
fib2(n - 2) + fib2(n - 1)
}
)
system.time(fib2(28))
## user system elapsed
## 0.02 0.00 0.05
One challenge with functionals is that it can be hard to see what’s going on inside. It’s not easy to pry open their internals like it is with a for loop. However, we can use FOs to help us. The tee function, defined below, has three arguments, all functions: f, the original function; on_input, a function that’s called with the inputs to f, and on_output a function that’s called with the output from f.
ignore <- function(...) NULL
tee <- function(f, on_input = ignore, on_output = ignore) {
function(...) {
input <- if (nargs() == 1) c(...) else list(...)
on_input(input)
output <- f(...)
on_output(output)
output
}
}
g <- function(x) cos(x) - x
uniroot(g, c(-5, 5))
## $root
## [1] 0.7390853
##
## $f.root
## [1] -2.603993e-07
##
## $iter
## [1] 6
##
## $init.it
## [1] NA
##
## $estim.prec
## [1] 6.103516e-05
uniroot(tee(g, on_input = print), c(-5, 5))
## [1] -5
## [1] 5
## [1] 0.2836622
## [1] 0.8752034
## [1] 0.7229804
## [1] 0.7386309
## [1] 0.7390853
## [1] 0.7390243
## [1] 0.7390853
## $root
## [1] 0.7390853
##
## $f.root
## [1] -2.603993e-07
##
## $iter
## [1] 6
##
## $init.it
## [1] NA
##
## $estim.prec
## [1] 6.103516e-05
uniroot(tee(g, on_output = print), c(-5, 5))
## [1] 5.283662
## [1] -4.716338
## [1] 0.6763747
## [1] -0.2343627
## [1] 0.02685676
## [1] 0.0007601196
## [1] -2.603993e-07
## [1] 0.0001018874
## [1] -2.603993e-07
## $root
## [1] 0.7390853
##
## $f.root
## [1] -2.603993e-07
##
## $iter
## [1] 6
##
## $init.it
## [1] NA
##
## $estim.prec
## [1] 6.103516e-05
How to modify the output of a function.
Negate <- function(f) { # Negates the function output
function(...) !f(...)
}
(Negate(is.null))(NULL)
## [1] FALSE
# removes all null elements from a list
compact <- function(x) Filter(Negate(is.null), x)
compact(c(NULL,3,3))
## [1] 3 3
# failwith() turns a function that throws an error into a function that returns a default value when there's an error
failwith <- function(default = NULL, f, quiet = TRUE) {
function(...) {
out <- default
try(out <- f(...), silent = quiet) # silent a True does not show error msg
out
}
}
log("a")
## Error in log("a"): non-numeric argument to mathematical function
failwith(NA, log)("a")
## [1] NA
Function composition
An important way of combining functions is through composition: f(g(x)).
compose <- function(f, g) {
function(...) f(g(...))
}
"%.%" <- compose
sqrt(3*4)
## [1] 3.464102
(sqrt %.% `*`)(3,4)
## [1] 3.464102
# function operators that combine logical predicates:
and <- function(f1, f2) {
function(...) {
f1(...) && f2(...)
}
}
or <- function(f1, f2) {
function(...) {
f1(...) || f2(...)
}
}
not <- function(f1) {
function(...) {
!f1(...)
}
}
# So something like:
data <- Filter(function(x) is.character(x) || is.factor(x), iris)
# becomes
data <- Filter(or(is.character, is.factor), iris)
A note about lazy eval:
wrap <- function(f) {
function(...) f(...)
}
fs <- list(sum = sum, mean = mean, min = min)
gs <- lapply(fs, wrap)
gs$sum(1:10) # bug, it's returning the minimum
## [1] 55
environment(gs$sum)$f
## function (..., na.rm = FALSE) .Primitive("sum")
It doesn’t work well with lapply() because f is lazily evaluated. This means that if you give lapply() a list of functions and a FO to apply those functions, it will look like it repeatedly applied the last function.
Another problem is that as designed, we have to pass a function object, rather than the name of a function, which is often more convenient. We can solve both problems by using match.fun(): it forces evaluation of f, and will find the function object if given its name:
wrap2 <- function(f) {
f <- match.fun(f)
function(...) f(...)
}
fs <- c(sum = "sum", mean = "mean", min = "min")
hs <- lapply(fs, wrap2)
hs$sum(1:10)
## [1] 55
environment(hs$sum)$f
## function (..., na.rm = FALSE) .Primitive("sum")