# par is used to specify global graphics parameters
# adn affects of subsequent graph functions
par(pch=1) # defines the plotting symbol
par(lty=2) # defines the line type
par(lwd=1) # line's width
par(col="black") # plotting color
par(bg="white") # background color
par(mfrow=c(2,2)) # number of plots/(row,column) (plots filled row-wise)
par(mfcol=c(2,2)) # number of plots/(row,column) (plots filled col-wise)
par(mfcol=c(1,1)) # default
# ?par check more options
par("mar") # check the current value of a given attribute, in this case the margins' size
xs <- seq(-2,2,.05)
plot(xs,dnorm(xs),xlab="",ylab="", pch=20)
lines(c(-2,2),c(.05,.4), col="green", lwd=2) # add a line to the current plot
points(seq(-2,2,length.out=10),rep(.2,10),pch=20, col="blue") # add points to the plot
text(-1,.1,"extra text") # add labels to the plot
title(main="title",sub="subtitle",xlab="x-axis",ylab="y-axis") # add annotations to plot
legend("topleft",c("a label","other label"), col=1:2, pch=20) # add a legend
axis(4) # add axis to the other edges
mtext("more text",3) # (1=bottom, 2=left, 3=top, 4=right).
x <- seq(0,1,.025)
plot(x,dbeta(x,4,2), type="l") # unite points using a line
curve(dbeta(x,2,4),from=0,to=1,type="l",col="red",add=TRUE) # add a new curve to the plot
abline(h=seq(0,2,.25),v=seq(0,1,.2),col="grey") # add a grid
# making several plots at once
xs <- seq(0,1,.05)
parameters <- c(.5,1,2,5)
par(mfrow=c(length(parameters),length(parameters)))
for(a in parameters) {
for(b in parameters) {
plot(xs,dbeta(xs,a,b), type="l", col="red",
xlab=paste("beta(",a,",",b,")"), ylab="density")
}
}
x <- rnorm(100)
y <- x + rnorm(100)
plot(x,y,type="n") # plots everything except the data
#let's say that the first half of (x,y) are males, and the last half are females
#define levels for that
gender = gl(2,50,labels=c("male","female"))
#plot male points in blue
points(x[gender=="male"], y[gender=="male"], col="blue")
#plot male points in red
points(x[gender=="female"], y[gender=="female"], col="red")
abline(lm(x~y),col="purple")
# points can have size too:
set.seed(4321)
n.trees <- 100
tree.data <- data.frame(size = floor(rexp(n.trees, 1/50)),
value = rnorm(n.trees,1000,55),
age = floor(rexp(n.trees, 1/50)))
tree.data$weight <- tree.data$age/50 * floor(rexp(n.trees, 1/150))
head(tree.data)
## size value age weight
## 1 51 1002.3564 40 149.60
## 2 1 967.6719 4 2.96
## 3 26 942.8321 47 109.98
## 4 25 1008.5532 16 14.08
## 5 29 991.2168 62 219.48
## 6 15 1102.1071 36 18.72
plot(tree.data$value, tree.data$weight, cex=log(tree.data$size+1),
xlab="dollars", ylab="tons",main="Size of trees defined by point area")
# we can also divide trees by age assigning different colors
f <- function(age) {
if (age<50)
return ("YOUNG")
else if (age<90)
return ("ADULT")
else
return ("OLD")
}
tree.data$age.cat <- Map(f,tree.data$age)
head(tree.data)
## size value age weight age.cat
## 1 51 1002.3564 40 149.60 YOUNG
## 2 1 967.6719 4 2.96 YOUNG
## 3 26 942.8321 47 109.98 YOUNG
## 4 25 1008.5532 16 14.08 YOUNG
## 5 29 991.2168 62 219.48 ADULT
## 6 15 1102.1071 36 18.72 YOUNG
plot(tree.data$value, tree.data$weight,
xlab="dollars", ylab="tons", type="n",main="Size of trees defined by point area")
colors <- c("blue","green","red")
labels <- c("YOUNG","ADULT","OLD")
for(i in 1:3 ) {
points(tree.data$value[tree.data$age.cat==labels[i]],
tree.data$weight[tree.data$age.cat==labels[i]],
cex=log(tree.data$size[tree.data$age.cat==labels[i]]+1), col=colors[i])
}
legend("topleft", tolower(labels), col = colors, pch = 20) # add a legend
How to present and compute areas for a range of, say, a normal distribution
mean <- 0 # mean parameter
sd <- 1 # sd parameter
lb <- -1.5 # lower bound
ub <- 1.25 # upper bound
x <- seq(-4, 4, length=101)
hx <- dnorm(x, mean, sd)
plot(x, hx, type="n", xlab="y", ylab="Density", main="Normal Distribution")
lines(x, hx)
# polygon draws the polygons whose vertices are given in x and y
i <- x >= lb & x <= ub
polygon(c(lb, lb, x[i], ub, ub),
c(0,dnorm(lb, mean, sd),hx[i],dnorm(ub, mean, sd),0), col="red")
area <- pnorm(ub, mean, sd) - pnorm(lb, mean, sd)
result <- paste("P(",lb,"< Y <",ub,") =", signif(area, digits=3))
# place label on top of graph
mtext(result,3)
# Another eg:
lambda <- 1/20 # sd parameter
lb <- 0 # lower bound
ub <- 60 # upper bound
x <- seq(lb,ub*1.5,length=100)
hx <- dexp(x,lambda)
plot(x, hx, type="n", xlab="y", ylab="Density")
lines(x, hx)
# polygon draws the polygons whose vertices are given in x and y
i <- x >= lb & x <= ub
polygon(c(lb,x[i],ub), c(0,hx[i],0), col="red")
area <- pexp(ub, 1/20) - pexp(lb, 1/20)
result <- paste("P(",lb,"< Y <",ub,") =", signif(area, digits=6))
# place label on top of graph
mtext(result,3)
title(bquote(paste("Exponential Distribution with ", lambda, "= 1/20")))
Using a gradient [ref]:
x <- seq(-3,3,0.01)
y1 <- dnorm(x,0,1)
y2 <- 0.5*dnorm(x,0,1)
x.shade <- seq(-2,1,0.01)
plot(x,y1,type="l",bty="L",xlab="X",ylab="dnorm(X)")
points(x,y2,type="l",col="gray")
l <- length(x.shade)
color <- heat.colors(l)
for (i in 1:l) {
polygon(c(x.shade[i],rev(x.shade[i])),
c(dnorm(x.shade[i],0,1),0.5*dnorm(rev(x.shade[i]),0,1)),
border=color[i],col=NA)
}
Histogram shows the distribution of observations.
normal.data <- rnorm(200,mean=2,sd=1)
hist(normal.data, breaks=25, freq=FALSE) # freq decides between frequency vs. density
rug(normal.data) # shows actual data points
# A density plot captures the distribution shape by smoothing the histogram. You can specify the amount/type of smoothing.
lines(density(normal.data,bw=.1),col="black",lwd=2) # bw defines the density smoothness
lines(density(normal.data),col="red",lwd=2)
lines(density(normal.data,bw=.5),col="blue",lwd=2)
boxplot(normal.data)
# Let's make some fake data:
some.data <- data.frame(name=sample(letters,26),
age=signif(rnorm(26,45,10),2))
some.data$salary=(some.data$age/45)*signif(runif(26,1000,2000),4)
some.data$profession=sample(gl(2, 13, labels = c("Guard", "Driver")),26)
head(some.data)
## name age salary profession
## 1 e 52 1192.533 Driver
## 2 h 45 1062.000 Guard
## 3 i 33 1108.067 Driver
## 4 u 54 1501.200 Guard
## 5 d 48 1895.467 Driver
## 6 g 40 1456.889 Driver
# we can boxplot this stuff divided by a category, say profession:
boxplot(some.data$salary ~ some.data$profession)
# we can try do the same thing with age, but we need to cut some intervals first
c1 <- cut(some.data$age, breaks = seq(25,65,by=10))
table(c1)
## c1
## (25,35] (35,45] (45,55] (55,65]
## 6 9 7 3
some.data$age.interval <- c1
boxplot(some.data$salary ~ some.data$age.interval, xlab="ages", ylab="wages")
size <- 200
another.data <- data.frame(id=sample(1:size),
value=sample(seq(100,900,by=100),size,rep=TRUE))
head(another.data)
## id value
## 1 93 200
## 2 192 100
## 3 65 800
## 4 25 200
## 5 117 500
## 6 44 100
# to show a histogram of the frequency of values, we use barplot
barplot(table(another.data$value),xlab="frequency", ylab="value",
horiz=TRUE,xlim=c(0,5+max(table(another.data$value))))
# another way to show it
records <- matrix(c(2000:2019,sample(1:100,20)), nrow=2, byrow=TRUE)
bp <- barplot(records[2,], names.arg=records[1,], las=3, ylim=c(0,max(records[2,])+20))
text(bp, records[2,], labels = records[2,], pos = 3, cex=.7)
plot(1:5,1:5,pch=19,xlab="",
main=expression(
paste(
'exp',
bgroup("[", frac(- n * bar(x) ** 2, 2 * sigma ** 2),"]")) <= c ),
sub=expression(
paste(
"1 - ",
Phi,
bgroup("[", c + frac(mu[0]-mu, lambda/sqrt(n)), "]"),
sep = ""))
)
rotate <- (-45) # parameters used for 3D plots
tilt <- 30
parallelness <- 5.0
shadeval <- 0.05
perspcex <- 0.7
ncontours <- 12
xs <- seq(0,1,.025)
ys <- seq(0,1,.025)
f <- function(x,y) {
dnorm(x,.5,.25) + dnorm(y,.5,.125)
}
zs <- outer(xs,ys,f) # if f cannot be directly applied to 'outer' use Vectorize(f) instead
persp(xs , ys , zs,
xlab="x" , ylab="y" , zlab="f(x,y)" ,
main="Main Title" , cex=perspcex, lwd=0.1 ,
xlim=c(0,1) , ylim=c(0,1) , zlim=c(-1,4),
theta=rotate , phi=tilt , d=parallelness ,
shade=shadeval)
## Warning in persp.default(xs, ys, zs, xlab = "x", ylab = "y", zlab =
## "f(x,y)", : surface extends beyond the box
# Also the same function in a contour map:
contour(xs , ys , zs,
main=bquote(" "), levels=signif(seq(0,4,length=ncontours),3) ,
drawlabels=FALSE ,xlab="x" , ylab="y" )
# other ways to 3D plot:
library(aws)
## Loading required package: awsMethods
##
## Use the function setCores() to change the number of CPU cores.
## Loading required package: gsl
library(lattice)
xs <- rnorm(100000,0,1)
ys <- rpois(100000,3)
pts <- cbind(xs,ys)
delta <- 30
bins <- binning(pts,NULL,nbins=c(delta,delta)) # bin data into little squares
freqs <- matrix(bins$table.freq,delta,delta) # convert table into matrix
plot(wireframe(freqs, zoom = .9, lwd = 0.01,
xlab="x",ylab="y",zlab="z",
scales=list(arrows=FALSE,distance=1,tick.number=8,draw=TRUE),
screen = list(z = 75, x = -70, y = 3), # screen rotation
drape = TRUE, colorkey = FALSE))
# the following eg shows the graph of a given 2D function
require(lattice)
x <- seq(-pi, pi, len = 20)
y <- seq(-pi, pi, len = 20)
g <- expand.grid(x = x, y = y)
g$z <- sin(sqrt(g$x^2 + g$y^2)) # insert funtion here
print(wireframe(z ~ x * y, g, drape = TRUE,
aspect = c(3,1), colorkey = TRUE))
# to draw 3D histograms, execute the next two functions:
library(rgl)
binplot.3d <- function(x,y,z,alpha=1,topcol="#ff0000",sidecol="#aaaaaa") {
save <- par3d(skipRedraw=TRUE)
on.exit(par3d(save))
x1<-c(rep(c(x[1],x[2],x[2],x[1]),3),rep(x[1],4),rep(x[2],4))
z1<-c(rep(0,4),rep(c(0,0,z,z),4))
y1<-c(y[1],y[1],y[2],y[2],rep(y[1],4),rep(y[2],4),rep(c(y[1],y[2],y[2],y[1]),2))
x2<-c(rep(c(x[1],x[1],x[2],x[2]),2),rep(c(x[1],x[2],rep(x[1],3),rep(x[2],3)),2))
z2<-c(rep(c(0,z),4),rep(0,8),rep(z,8) )
y2<-c(rep(y[1],4),rep(y[2],4),rep(c(rep(y[1],3),rep(y[2],3),y[1],y[2]),2) )
rgl.quads(x1,z1,y1,col=rep(sidecol,each=4),alpha=alpha)
rgl.quads(c(x[1],x[2],x[2],x[1]),rep(z,4),c(y[1],y[1],y[2],y[2]),
col=rep(topcol,each=4),alpha=1)
rgl.lines(x2,z2,y2,col="#000000")
}
hist3d <- function(x,y=NULL,nclass="auto",alpha=1,col="#ff0000",scale=10) {
save <- par3d(skipRedraw=TRUE)
on.exit(par3d(save))
xy <- xy.coords(x,y)
x <- xy$x
y <- xy$y
n<-length(x)
if (nclass == "auto") { nclass<-ceiling(sqrt(nclass.Sturges(x))) }
breaks.x <- seq(min(x),max(x),length=(nclass+1))
breaks.y <- seq(min(y),max(y),length=(nclass+1))
z<-matrix(0,(nclass),(nclass))
for (i in 1:nclass)
{
for (j in 1:nclass)
{
z[i, j] <- (1/n)*sum(x < breaks.x[i+1] & y < breaks.y[j+1] &
x >= breaks.x[i] & y >= breaks.y[j])
binplot.3d(c(breaks.x[i],breaks.x[i+1]),c(breaks.y[j],breaks.y[j+1]),
scale*z[i,j],alpha=alpha,topcol=col)
}
}
}
# and then try it. Notice that a new graphic window will open, which can be manipulated by the mouse
#eg of use:
rho <- .9
std <- sqrt(1-rho^2)
nsim <- 10000
X <- c(rnorm(1), rep(NA,nsim-1))
Y <- c(rnorm(1), rep(NA,nsim-1))
for(i in 2:nsim) {
X[i] <- rnorm(1, rho*Y[i-1], std)
Y[i] <- rnorm(1, rho*X[i], std)
}
# draw 3d histogram
hist3d(X,Y,nclass=30,col="cyan",scale=100)
cf. stackoverflow
set.seed(101)
x <- seq(-2,2,length.out=20)
df <- data.frame(x = x,
y = x + rnorm(20,0,1))
plot(y ~ x, data = df)
# model
mod <- lm(y ~ x, data = df)
# predicts + interval
newx <- seq(min(df$x), max(df$x), length.out=100)
preds <- predict(mod, newdata = data.frame(x=newx),
interval = 'confidence')
# plot
plot(y ~ x, data = df, type = 'n')
# add fill
polygon(c(rev(newx), newx), c(rev(preds[ ,3]), preds[ ,2]), col = 'grey80', border = NA)
# model
abline(mod)
# intervals
lines(newx, preds[ ,3], lty = 'dashed', col = 'red')
lines(newx, preds[ ,2], lty = 'dashed', col = 'red')