x = 36 for (x in seq(10)){ cat(x, "\t") # x is the iterator }
## 1 2 3 4 5 6 7 8 9 10
September 21, 2018
x = 36 for (x in seq(10)){ cat(x, "\t") # x is the iterator }
## 1 2 3 4 5 6 7 8 9 10
for (x in seq(10)){ x = 3.9*x*(1-x) cat(x, "\t") }
## 0 -7.8 -23.4 -46.8 -78 -117 -163.8 -218.4 -280.8 -351
a = 0 while(a<10){ a = a+1 ## unlike C, a++, a+=1 won't work in R cat(a, "\t") }
## 1 2 3 4 5 6 7 8 9 10
m <- matrix(data=cbind(rnorm(30, 0), rnorm(30, 2), rnorm(30, 5)), nrow=30, ncol=3) dim(m)
## [1] 30 3
apply(m, 2, mean)
## [1] -0.06959675 2.20471963 4.93081531
apply(m, 1, mean)
## [1] 3.122706 1.867839 2.622329 3.263160 1.930000 2.429607 2.278498 ## [8] 3.352178 2.346678 2.104584 2.281192 1.537102 1.592945 1.504417 ## [15] 2.594441 2.161014 1.841993 2.596635 2.607188 3.045947 1.933094 ## [22] 2.675230 2.966192 2.442753 2.247247 2.142100 2.055638 2.070515 ## [29] 2.551278 2.494883
apply(m, 2, function(x) length(x[x<0]))
## [1] 13 0 0
traversing over a set of data like a list or vector, and calling the specified function for each item.
Usage: lapply(alist, afunction) - applies afunction to all the elements of a list or vector, returns a list of the results. Example:
cap.state.name <- lapply(state.name, toupper) head(cap.state.name, n = 3)
## [[1]] ## [1] "ALABAMA" ## ## [[2]] ## [1] "ALASKA" ## ## [[3]] ## [1] "ARIZONA"
sapply(1:3, function(x) x^2)
## [1] 1 4 9
function_name <- function([arg1], [arg2], ...){ #function code body }
N = 1e5 d <- as.data.frame(cbind(rnorm(N),runif(N))) system.time(for (i in 1:N) { d$mean2[i] <- mean(c(d[i, 1], d[i, 2])) })
## user system elapsed ## 24.50 22.61 50.32
system.time(d$mean1 <- apply(d, 1, mean))
## user system elapsed ## 0.6 0.0 0.6
# computes the mean of a vector of numbers mean <- function(a_vector) { s <- sum(a_vector) x <- s/length(a_vector) }
# checks to see if a string s starts with letter x startsWith <- function(x, s){ if(x == substr(s, 0, 1){ return(TRUE) } return(FALSE) }
sort_vector <- function(a_vector, ascending=TRUE){ # sorting algorithm }
sort_vector(a_vector) # returns a_vector in ascending order sort_vector(a_vector, FALSE) # returns a vector in descending order
x <- 5 test <- function() { cat(x + 5) } test();
## 10
x <- 10 test <- function() { x <- 5; cat(x + 20) } test() #prints 25
## 25
cat(x + 20) #prints 30
## 30
local variable assignment takes precedence inside the function over the global assignment.
x <- 10 test <- function(z) { z <- z + 10 cat(z) } test(x) #prints 20
## 20
cat(x) #prints 10
## 10
system.time(expr)
Return CPU times that "expr" used. A numeric vector of length 5 containing the user cpu, system cpu, elapsed, subproc1, subproc2 times. The subproc times are the user and system cpu time used by child processes (and so are usually zero).
system.time(for(i in 1:5000) x<-mean(rnorm(1000)))
## user system elapsed ## 0.39 0.02 0.44
now<-proc.time() for(i in 1:5000) x<-mean(rnorm(1000)) proc.time()-now
## user system elapsed ## 0.40 0.00 0.44
x<-rnorm(1e7) sum<-0 now<-proc.time() for(i in 1:1e7){sum<-sum+x[i]} meanx<-sum/1e7 proc.time()-now
## user system elapsed ## 0.39 0.00 0.44
system.time(mean(x))
## user system elapsed ## 0.02 0.01 0.04
x = seq(1:1e4) #install.packages("microbenchmark") library(microbenchmark) microbenchmark(sqrt(x),x^(0.5))
## Unit: microseconds ## expr min lq mean median uq max neval ## sqrt(x) 64.396 66.7670 129.7510 71.5075 83.557 4328.689 100 ## x^(0.5) 426.667 443.4575 496.9725 467.3590 499.359 1378.370 100
Here are two other ways to compute the square root of a vector. Which do you think will be fastest? Which will be slowest? Use microbenchmarking to test your answers.
x ^ (1 / 2) exp(log(x) / 2)
Compare execution times for the different methods of solving the Least Squares problem when the number of observations is large.
Take \(n = 5 \times 10^5\).
set.seed(123) n <- 5e5; p <- 20 X <- matrix(rnorm(n * p, mean = 1 : p, sd = 10), nr = n, nc = p, byrow = TRUE) y <- rowSums(X) + rnorm(n)
system.time(bHat1 <- solve(t(X) %*% X) %*% t(X) %*% y)
## user system elapsed ## 0.36 0.04 0.41
crossprod
.system.time(bHat2 <- solve(crossprod(X), crossprod(X, y)))
## user system elapsed ## 0.19 0.00 0.19
p = 1000; A = array(rnorm(p*p), c(p,p)) A = crossprod(A) ptm <- proc.time(); B = solve(A); proc.time()-ptm
## user system elapsed ## 0.77 0.00 0.81
ptm <- proc.time(); B = chol2inv(A); proc.time()-ptm
## user system elapsed ## 0.14 0.00 0.15
R function combn
can be used to produce the indices corresponding to all \(\binom{6}{3}\) ways of choosing 3 elements out of 6 elements.
(idx = combn(x = 6, m=3))
## [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10] [,11] [,12] [,13] ## [1,] 1 1 1 1 1 1 1 1 1 1 2 2 2 ## [2,] 2 2 2 2 3 3 3 4 4 5 3 3 3 ## [3,] 3 4 5 6 4 5 6 5 6 6 4 5 6 ## [,14] [,15] [,16] [,17] [,18] [,19] [,20] ## [1,] 2 2 2 3 3 3 4 ## [2,] 4 4 5 4 4 5 5 ## [3,] 5 6 6 5 6 6 6
x=c(37,55,57) y=c(23,31,70) xy = c(x,y) # the combined data set permut = NULL # the permuted data set (a 20*6 matrix) for(i in 1:ncol(idx)){ permut = rbind(permut, c(xy[idx[,i]], xy[-idx[,i]])) } permut.x = permut[, 1:3] # the permuted X matrix (20*3) permut.y = permut[, 4:6] # the permuted Y matrix (20*3) head(permut.x, n = 3)
## [,1] [,2] [,3] ## [1,] 37 55 57 ## [2,] 37 55 23 ## [3,] 37 55 31
delta1 = apply(permut.x, 1, mean) - apply(permut.y, 1, mean) delta1.obs = mean(x)-mean(y)
P-value for permutation of sample mean
(pval1.upper = mean(delta1 >= delta1.obs)) #upper-tailed
## [1] 0.35
(pval1.2sided = mean(abs(delta1) >= abs(delta1.obs))) #two-tailed
## [1] 0.7
hist(delta1,breaks=10);abline(v = delta1.obs,col="red")
idx = combn(x = 6, m=3) x=c(37,55,57) y=c(23,31,70) xy = c(x,y) # the combined data set permut = NULL # the permuted data set (a 20*6 matrix) for(i in 1:20){ permut = rbind(permut, c(xy[idx[,i]], xy[-idx[,i]])) } permut.x = permut[, 1:3] # the permuted X matrix (20*3) permut.y = permut[, 4:6] # the permuted Y matrix (20*3) delta1 = apply(permut.x, 1, mean) - apply(permut.y, 1, mean) delta1.obs = mean(x)-mean(y) #pvalue for permutation of sample mean (pval1.upper = mean(delta1 >= delta1.obs)) #upper-tailed (pval1.2sided = mean(abs(delta1) >= abs(delta1.obs))) #two-tailed hist(delta1,breaks=10);abline(v = delta1.obs,col="red")
Carter and Hubert (1985) give data for percentage variation in blood sugar over 1-hour periods for rabbits given two different dose levels of a drug, Is there evidence of a response difference between levels? Apply a Permutation test.
dose.1 = c(0.21,16.20,10.10,8.67,11.13,1.96,10.19,15.87,12.81) dose.2 = c(1.59,2.66,6.27,2.32,10.87,7.23,3.76,3.02,15.01) boxplot(dose.1,dose.2)