Refs:
Hadley Wichham - ggplot2, Elegant Graphics for Data Analysis
Winston Chang - R Graphics Cookbook
ggplot2 implements Wilkinson grammar of graphics that describes and conceptually organizes the features that underlie statistical graphics.
The most important concepts are:
The data (duh!)
Aesthetic mappings describing how the data is mapped to aesthetic atributes (eg, shape, color, horizontal and vertical position)
Geometric objects, geoms, that we see on the graphic (eg, points, lines)
Statistical mappings, stats, summarizing the data (eg, binning, counting)
Scaling mappings, scales, the map the data space into aesthetic space (eg, linear or log axis, legends, how to map colors), there should be a scale for each aesthetic
Coordinate systems, coord, a map from the data coordinates to the graph coordinates (eg, cartesian, log-log, polar)
Facets describing how to split the data into subsets
library(ggplot2)
library(gridExtra) # for presenting plots side by side
set.seed(101)
d <- diamonds[sample(nrow(diamonds), 100), ] # read some data for next examples
head(d)
FALSE # A tibble: 6 x 10
FALSE carat cut color clarity depth table price x y z
FALSE <dbl> <ord> <ord> <ord> <dbl> <dbl> <int> <dbl> <dbl> <dbl>
FALSE 1 1 Very Good I SI2 62.4 63 3276 6.44 6.35 3.99
FALSE 2 0.5 Ideal H VS1 61.6 58 1384 5.08 5.11 3.14
FALSE 3 1.41 Ideal J VS1 61.6 56 8275 7.19 7.22 4.44
FALSE 4 1.24 Premium D VS2 59.3 58 9916 7.09 7.03 4.19
FALSE 5 0.4 Very Good E VS2 61.2 60 912 4.79 4.68 2.9
FALSE 6 0.6 Good G VS1 60.1 61 1757 5.44 5.5 3.29
qplot
qplot
is ggplot2’s plot function.
plot1 <- qplot(carat, price, data = d)
plot2 <- qplot(log(carat), log(price), data = d)
plot3 <- qplot(carat, x * y * z, data = d) # x*y*z gives the volume
grid.arrange(plot1, plot2, plot3, ncol=3)
qplot
automates some aesthetics like how to assign colors and shapes to data:
plot1 <- qplot(carat, price, data = d, colour = color, size= carat)
plot2 <- qplot(carat, price, data = d, shape = cut)
grid.arrange(plot1, plot2, ncol=2)
## Warning: Using shapes for an ordinal variable is not advised
Notice that the plots also come with a legend. For each aesthetic attribute there is a scale function mapping the data values to the aesthetic values. Eg, in the left plot, the attribute D was associated with red.
qplot
accepts different types of geometric objects, geoms, which will make it produce different types of graphics. The default is geom="point"
, ie, the scatterplots we’ve seen. Other objects are possible:
plot1 <- qplot(carat, price, data = d, geom=c("point", "smooth")) # default smooth by loess regression
plot2 <- qplot(carat, price, data = d, geom=c("point", "boxplot"))
plot3 <- qplot(carat, price, data = d, geom=c("point", "line"))
grid.arrange(plot1, plot2, plot3, ncol=3)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## Warning: Continuous x aesthetic -- did you forget aes(group=...)?
Geoms for 1D data:
plot1 <- qplot(carat, data = d, geom="histogram") # continuous values
plot2 <- qplot(color, data = d, geom="bar") # discrete values
grid.arrange(plot1, plot2, ncol=2)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
plot1 <- qplot(carat, data = d, geom="freqpoly")
plot2 <- qplot(carat, data = d, geom="density")
grid.arrange(plot1, plot2, ncol=2)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
The smooth geom can be used with different regression methods:
plot1 <- qplot(carat, price, data = d, geom=c("point", "smooth"), method="lm")
## Warning: Ignoring unknown parameters: method
plot2 <- qplot(carat, price, data = d, geom=c("point", "smooth"), method="lm", formula=y~poly(x,3)) # polynomial regression
## Warning: Ignoring unknown parameters: method, formula
grid.arrange(plot1, plot2, ncol=2)
library(splines) # using natural splines
plot3 <- qplot(carat, price, data = d, geom=c("point", "smooth"), method="lm", formula=y~ns(x,5))
## Warning: Ignoring unknown parameters: method, formula
library(MASS) # for robust regression
plot4 <- qplot(carat, price, data = d, geom=c("point", "smooth"), method="rlm")
## Warning: Ignoring unknown parameters: method
grid.arrange(plot3, plot4, ncol=2)
Sometimes the datapoints are too many and a direct plot is unable to transmit an appropriate perspective of the data. One tool is to jitter the points (add small random noise so that many equal data points are spread around its center) and/or define an amount of opacity, ie, stating how many points there must be at area so that the graphic plots without transparency.
plot1 <- qplot(carat, price, data = diamonds)
plot2 <- qplot(carat, price, data = diamonds, alpha=I(1/50)) # 100 pts for total opacity
grid.arrange(plot1, plot2, ncol=2)
plot1 <- qplot(color, price/carat, data = diamonds)
plot2 <- qplot(color, price/carat, data = diamonds, geom = "jitter")
plot3 <- qplot(color, price/carat, data = diamonds, geom = "jitter", alpha = I(1/10))
grid.arrange(plot1, plot2, plot3, ncol=3)
For the histogram geom we can define the bin size:
plot1 <- qplot(carat, data = diamonds, geom = "histogram", binwidth = 1)
plot2 <- qplot(carat, data = diamonds, geom = "histogram", binwidth = 0.1)
plot3 <- qplot(carat, data = diamonds, geom = "histogram", binwidth = 0.05)
grid.arrange(plot1, plot2, plot3, ncol=3)
For density plots we have the adjust parameter:
plot1 <- qplot(carat, data = diamonds, geom = "density", adjust = 4)
plot2 <- qplot(carat, data = diamonds, geom = "density", adjust = 1)
plot3 <- qplot(carat, data = diamonds, geom = "density", adjust = 0.5)
grid.arrange(plot1, plot2, plot3, ncol=3)
Applying a color aesthetic:
plot1 <- qplot(carat, data = diamonds, geom = "histogram", fill = color)
plot2 <- qplot(carat, data = diamonds, geom = "density", colour = color)
grid.arrange(plot1, plot2, ncol=2)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
head(economics)
FALSE # A tibble: 6 x 6
FALSE date pce pop psavert uempmed unemploy
FALSE <date> <dbl> <dbl> <dbl> <dbl> <dbl>
FALSE 1 1967-07-01 507. 198712 12.6 4.5 2944
FALSE 2 1967-08-01 510. 198911 12.6 4.7 2945
FALSE 3 1967-09-01 516. 199113 11.9 4.6 2958
FALSE 4 1967-10-01 512. 199311 12.9 4.9 3143
FALSE 5 1967-11-01 517. 199498 12.8 4.7 3066
FALSE 6 1967-12-01 525. 199657 11.8 4.8 3018
year <- function(x) as.POSIXlt(x)$year + 1900
economics$year <- year(economics$date)
head(economics)
FALSE # A tibble: 6 x 7
FALSE date pce pop psavert uempmed unemploy year
FALSE <date> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
FALSE 1 1967-07-01 507. 198712 12.6 4.5 2944 1967
FALSE 2 1967-08-01 510. 198911 12.6 4.7 2945 1967
FALSE 3 1967-09-01 516. 199113 11.9 4.6 2958 1967
FALSE 4 1967-10-01 512. 199311 12.9 4.9 3143 1967
FALSE 5 1967-11-01 517. 199498 12.8 4.7 3066 1967
FALSE 6 1967-12-01 525. 199657 11.8 4.8 3018 1967
We can use geom line for standard time series:
plot1 <- qplot(date, unemploy / pop, data = economics, geom = "line")
plot2 <- qplot(date, uempmed, data = economics, geom = "line")
grid.arrange(plot1, plot2, ncol=2)
The geom path that joins points adjacent in time (when scatterplots does not provide enough information):
plot1 <- qplot(unemploy/pop, uempmed, data = economics, geom = c("point", "path"))
plot2 <- qplot(unemploy/pop, uempmed, data = economics, geom = c("point", "path"), color=year)
grid.arrange(plot1, plot2, ncol=2)
Faceting splits the data into subsets which are present at different graphs for easier comparisation.
plot1 <- qplot(carat, data = diamonds, facets = color ~ ., geom = "histogram", binwidth = 0.1, xlim = c(0, 3))
plot2 <- qplot(carat, data = diamonds, facets = cut ~ ., geom = "density", binwidth = 0.05, xlim = c(0, 3))
grid.arrange(plot1, plot2, ncol=2)
plot1 <- qplot(displ, hwy, data = mpg, facets = . ~ drv)
plot2 <- qplot(hwy, data = mpg, facets = drv ~ ., binwidth = 2)
grid.arrange(plot1, plot2, ncol=2)
Here’s the use of some other parameters for qplot
:
qplot(carat, price, data=d,
xlim=c(0.5,1.5), ylim=c(0,5e3),
main="Main Title",
xlab=expression(beta[1] == 1), ylab="some stuff")
ggplot
and LayersLayers are responsible for creating the objects that we perceive on the plot. A layer is composed of four parts:
data and aesthetic mapping,
a statistical transformation (stat),
a geometric object (geom)
a position adjustment.
qplot
does everything inside its parameters. If we like to have more control in the creation of the graphic, we need to use ggplot
.
ggplot
receives two arguments, the data which must be a data frame and which aesthetics mappings we want. An eg:
p <- ggplot(d, aes(carat, price, colour=cut))
p
It gave an error because we still not defined geoms so that there is something to see. To add new layers we should use operator +
:
plot1 <- p + geom_point()
plot2 <- p + geom_line()
grid.arrange(plot1, plot2, ncol=2)
ggplot2
has a series of specialized functions that simplify the use of layer
with prefixes geom_XXX
or stat_XXX
p <- ggplot(diamonds, aes(x=carat))
plot1 <- p + geom_bar(geom_params = list(fill = "steelblue"),
stat = "bin",
stat_params = list(binwidth = 0.25))
## Warning: Ignoring unknown parameters: geom_params, stat_params
plot2 <- p + geom_histogram(binwidth = 0.25, fill = "steelblue") # same graph
grid.arrange(plot1, plot2, ncol=2)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
We can add multiple layers:
ggplot(msleep, aes(sleep_rem / sleep_total, awake)) +
geom_point() +
geom_smooth()
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
iIt’s possible to use the same graph objet with another data using operator %+%
:
head(mtcars)
## mpg cyl disp hp drat wt qsec vs am gear carb
## Mazda RX4 21.0 6 160 110 3.90 2.620 16.46 0 1 4 4
## Mazda RX4 Wag 21.0 6 160 110 3.90 2.875 17.02 0 1 4 4
## Datsun 710 22.8 4 108 93 3.85 2.320 18.61 1 1 4 1
## Hornet 4 Drive 21.4 6 258 110 3.08 3.215 19.44 1 0 3 1
## Hornet Sportabout 18.7 8 360 175 3.15 3.440 17.02 0 0 3 2
## Valiant 18.1 6 225 105 2.76 3.460 20.22 1 0 3 1
plot1 <- ggplot(mtcars, aes(mpg, wt, colour = cyl)) + geom_point()
mtcars2 <- transform(mtcars, wt = wt ^ 2)
plot2 <- plot1 %+% mtcars2
grid.arrange(plot1, plot2, ncol=2)
Function aes
describes which data values are assigned to things we see in the graph. The first two parameters are to define which should be considered as x
and y
. Other mappings can be stated, like colour like in the previous eg aes(mpg, wt, colour = cyl)
.
p <- ggplot(mtcars)
plot1 <- p + aes(wt, hp) + geom_point(colour="darkblue") # set colors
plot2 <- p + aes(disp, mpg) + geom_point() + aes(colour = factor(cyl)) # map colors
grid.arrange(plot1, plot2, ncol=2)
We can split the data into distinct groups, ie, datapoints that share something in common, and then plot them differently. This is done with the group
mapping.
plot1 <- ggplot(mtcars, aes(wt, hp, colour=factor(cyl), group=cyl)) +
geom_smooth(method="lm")
plot2 <- ggplot(mtcars, aes(wt, hp, colour=factor(cyl), group=cyl)) +
geom_boxplot() + theme(legend.position="none") # removes legend
grid.arrange(plot1, plot2, ncol=2)
These dtermine the rendering of the graph. Each geom has a set of aesthetics it understands and a default statistic.
Some egs:
p <- ggplot(mtcars) + aes(wt, hp) + geom_point()
plot1 <- p +
geom_abline(intercept=2, slope=20, colour="red", size=1.5, linetype="dashed") +
geom_hline(yintercept=seq(100, 150, by=15)) +
geom_text(label=" A", angle=30)
plot2 <- p + geom_path(size = 0.5, lineend = "round")
plot3 <- p + geom_rect(xmin=2, xmax=3, ymin=200, ymax=300, fill="lightblue")
grid.arrange(plot1, plot2, plot3, ncol=3)
df <- data.frame(x = c(3, 1, 5), y = c(2, 4, 6), label = c("a","b","c"))
p <- ggplot(df, aes(x, y, label = label)) + xlab(NULL) + ylab(NULL)
plot1 <- p + geom_point() + ggtitle("geom_point")
plot2 <- p + geom_bar(stat="identity") + ggtitle("geom_bar(stat=\"identity\")")
plot3 <- p + geom_line() + ggtitle("geom_line")
plot4 <- p + geom_area() + ggtitle("geom_area")
plot5 <- p + geom_path() + ggtitle("geom_path")
plot6 <- p + geom_text() + ggtitle("geom_text")
plot7 <- p + geom_tile() + ggtitle("geom_tile")
plot8 <- p + geom_polygon() + ggtitle("geom_polygon")
grid.arrange(plot1, plot2, plot3, plot4, plot5, plot6, plot7, plot8, nrow=2, ncol=4)
Position adjustments modify the position of elements within a layer.
p <- ggplot(data = diamonds) + aes(x=clarity, fill=cut)
plot1 <- p + geom_bar(position="fill")
plot2 <- p + geom_bar(position="dodge")
plot3 <- p + geom_bar(position="stack")
grid.arrange(plot1, plot2, plot3, ncol=3)
It’s possible to connect geoms to other statistics. Values computed by statistics are surrounded by double points, like ..count..
p <- ggplot(diamonds, aes(carat)) + xlim(0, 3)
plot1 <- p + stat_bin(aes(ymax = ..count..), binwidth = 0.1, geom = "area")
plot2 <- p + stat_bin(aes(size = ..density..), binwidth = 0.1, geom = "point", position="identity")
grid.arrange(plot1, plot2, ncol=2)
p <- ggplot(diamonds, aes(carat, price)) + xlim(1,3)
plot1 <- p + stat_bin2d(bins = 25)
plot2 <- p + stat_binhex(bins = 10)
grid.arrange(plot1, plot2, ncol=2)
p <- ggplot(diamonds, aes(carat, price)) + xlim(1,3)
plot1 <- p + geom_point() + geom_density2d()
plot2 <- p + stat_density2d(geom = "point", aes(size = ..density..), contour = F) +
scale_size_area(0.2, 1.5)
plot3 <- p + stat_density2d(geom = "tile", aes(fill = ..density..), contour = F)
grid.arrange(plot1, plot2, plot3, ncol=3)
presdts <- presidential[-(1:3),] # remove the first 3 presidents
presdts$start <- as.Date(presdts$start)
head(presdts,4)
## # A tibble: 4 x 4
## name start end party
## <chr> <date> <date> <chr>
## 1 Nixon 1969-01-20 1974-08-09 Republican
## 2 Ford 1974-08-09 1977-01-20 Republican
## 3 Carter 1977-01-20 1981-01-20 Democratic
## 4 Reagan 1981-01-20 1989-01-20 Republican
head(economics,4)
## # A tibble: 4 x 7
## date pce pop psavert uempmed unemploy year
## <date> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 1967-07-01 507. 198712 12.6 4.5 2944 1967
## 2 1967-08-01 510. 198911 12.6 4.7 2945 1967
## 3 1967-09-01 516. 199113 11.9 4.6 2958 1967
## 4 1967-10-01 512. 199311 12.9 4.9 3143 1967
p <- qplot(date, unemploy, data=economics, geom="line",
xlab = "", ylab = "No. unemployed (1000s)")
p + geom_vline(aes(xintercept = as.numeric(start)), data = presdts) +
scale_x_date()
yrng <- range(economics$unemploy)
xrng <- range(economics$date)
library(scales) # use: alpha()
p + geom_rect(aes(NULL, NULL, xmin = start, xmax = end, fill = party),
ymin = yrng[1], ymax = yrng[2], data = presidential) +
scale_fill_manual(values = alpha(c("blue", "red"), 0.2))
highest <- subset(economics, unemploy == max(unemploy))
p + geom_point(data = highest, size = 3, colour = alpha("red", 0.5))
p + geom_text(aes(x, y, label = "Unemployment rates of the last 40 years"),
data = data.frame(x = xrng[2], y = yrng[2]),
hjust = 1, vjust = 1, size = 4)
Transformations carried out by the coordinate system change the appearance of the geoms: in polar coordinates a rectangle becomes a slice of a doughnut; in a map projection, the shortest path between two points will no longer be a straight line – Wickham
p <- ggplot(data=data.frame(x=c(1,200),y=c(1,100))) + aes(x,y)
plot1 <- p + geom_hline(yintercept=seq(20, 60, by=15)) +
geom_rect(xmin=20, xmax=40, ymin=20, ymax=60)
plot2 <- plot1 + coord_polar() # x position mapped to angle
plot3 <- plot1 + coord_polar(theta="y") # y position mapped to angle
plot4 <- plot1 + coord_flip() # flip coordinates
plot5 <- plot1 + coord_equal()
plot6 <- plot1 + coord_trans(x = "log10")
grid.arrange(plot1, plot2, plot3, plot4, plot5, plot6, nrow=2, ncol=3)
Plots within plots:
plot1 + annotation_custom(ggplotGrob(plot2), xmin = 100, xmax = 200,
ymin = 50, ymax = 100)
Eg with map coordinates:
library(maps)
library(mapproj)
m <- map_data("italy")
p <- ggplot(m, aes(x=long, y=lat, group=group)) +
geom_polygon(fill="white", colour="black")
p + coord_map() # With default mercator projection
Source: https://statisticaloddsandends.wordpress.com/2019/02/24/plots-within-plots-with-ggplot2-and-ggmap/
library(ggmap)
## Google's Terms of Service: https://cloud.google.com/maps-platform/terms/.
## Please cite ggmap if you use it! See citation("ggmap") for details.
us_bbox <- c(left = -125, bottom = 25, right = -55, top = 50)
us_main_map <- get_stamenmap(us_bbox, zoom = 5, maptype = "terrain")
## Source : http://tile.stamen.com/terrain/5/4/10.png
## Source : http://tile.stamen.com/terrain/5/5/10.png
## Source : http://tile.stamen.com/terrain/5/6/10.png
## Source : http://tile.stamen.com/terrain/5/7/10.png
## Source : http://tile.stamen.com/terrain/5/8/10.png
## Source : http://tile.stamen.com/terrain/5/9/10.png
## Source : http://tile.stamen.com/terrain/5/10/10.png
## Source : http://tile.stamen.com/terrain/5/11/10.png
## Source : http://tile.stamen.com/terrain/5/4/11.png
## Source : http://tile.stamen.com/terrain/5/5/11.png
## Source : http://tile.stamen.com/terrain/5/6/11.png
## Source : http://tile.stamen.com/terrain/5/7/11.png
## Source : http://tile.stamen.com/terrain/5/8/11.png
## Source : http://tile.stamen.com/terrain/5/9/11.png
## Source : http://tile.stamen.com/terrain/5/10/11.png
## Source : http://tile.stamen.com/terrain/5/11/11.png
## Source : http://tile.stamen.com/terrain/5/4/12.png
## Source : http://tile.stamen.com/terrain/5/5/12.png
## Source : http://tile.stamen.com/terrain/5/6/12.png
## Source : http://tile.stamen.com/terrain/5/7/12.png
## Source : http://tile.stamen.com/terrain/5/8/12.png
## Source : http://tile.stamen.com/terrain/5/9/12.png
## Source : http://tile.stamen.com/terrain/5/10/12.png
## Source : http://tile.stamen.com/terrain/5/11/12.png
## Source : http://tile.stamen.com/terrain/5/4/13.png
## Source : http://tile.stamen.com/terrain/5/5/13.png
## Source : http://tile.stamen.com/terrain/5/6/13.png
## Source : http://tile.stamen.com/terrain/5/7/13.png
## Source : http://tile.stamen.com/terrain/5/8/13.png
## Source : http://tile.stamen.com/terrain/5/9/13.png
## Source : http://tile.stamen.com/terrain/5/10/13.png
## Source : http://tile.stamen.com/terrain/5/11/13.png
p_main <- ggmap(us_main_map)
alaska_bbox <- c(left = -180, bottom = 50, right = -128, top = 72)
alaska_map <- get_stamenmap(alaska_bbox, zoom = 5, maptype = "terrain")
## Source : http://tile.stamen.com/terrain/5/0/6.png
## Source : http://tile.stamen.com/terrain/5/1/6.png
## Source : http://tile.stamen.com/terrain/5/2/6.png
## Source : http://tile.stamen.com/terrain/5/3/6.png
## Source : http://tile.stamen.com/terrain/5/4/6.png
## Source : http://tile.stamen.com/terrain/5/0/7.png
## Source : http://tile.stamen.com/terrain/5/1/7.png
## Source : http://tile.stamen.com/terrain/5/2/7.png
## Source : http://tile.stamen.com/terrain/5/3/7.png
## Source : http://tile.stamen.com/terrain/5/4/7.png
## Source : http://tile.stamen.com/terrain/5/0/8.png
## Source : http://tile.stamen.com/terrain/5/1/8.png
## Source : http://tile.stamen.com/terrain/5/2/8.png
## Source : http://tile.stamen.com/terrain/5/3/8.png
## Source : http://tile.stamen.com/terrain/5/4/8.png
## Source : http://tile.stamen.com/terrain/5/0/9.png
## Source : http://tile.stamen.com/terrain/5/1/9.png
## Source : http://tile.stamen.com/terrain/5/2/9.png
## Source : http://tile.stamen.com/terrain/5/3/9.png
## Source : http://tile.stamen.com/terrain/5/4/9.png
## Source : http://tile.stamen.com/terrain/5/0/10.png
## Source : http://tile.stamen.com/terrain/5/1/10.png
## Source : http://tile.stamen.com/terrain/5/2/10.png
## Source : http://tile.stamen.com/terrain/5/3/10.png
p_alaska <- ggmap(alaska_map) +
#labs(subtitle = "Alaska") +
theme(axis.title = element_blank(),
axis.text = element_blank(),
axis.ticks = element_blank())
hawaii_bbox <- c(left = -160, bottom = 18.5, right = -154.5, top = 22.5)
hawaii_map <- get_stamenmap(hawaii_bbox, zoom = 6, maptype = "terrain")
## Source : http://tile.stamen.com/terrain/6/3/27.png
## Source : http://tile.stamen.com/terrain/6/4/27.png
## Source : http://tile.stamen.com/terrain/6/3/28.png
## Source : http://tile.stamen.com/terrain/6/4/28.png
p_hawaii <- ggmap(hawaii_map) +
#labs(subtitle = "Hawaii") +
theme(axis.title = element_blank(),
axis.text = element_blank(),
axis.ticks = element_blank())
library(grid)
p_main +
inset(ggplotGrob(p_alaska), xmin = -76.7, xmax = -66.7, ymin = 26, ymax = 35) +
inset(ggplotGrob(p_hawaii), xmin = -66.5, xmax = -55.5, ymin = 26, ymax = 35)
Let’s see an example with error bars (summarySE
is defined in the R markdown)):
# from http://www.cookbook-r.com/Graphs/Plotting_means_and_error_bars_(ggplot2)/
tgc <- summarySE(ToothGrowth, measurevar="len", groupvars=c("supp","dose"))
##
## Attaching package: 'plyr'
## The following object is masked from 'package:maps':
##
## ozone
head(tgc)
## supp dose N len sd se ci
## 1 OJ 0.5 10 13.23 4.459709 1.4102837 3.190283
## 2 OJ 1.0 10 22.70 3.910953 1.2367520 2.797727
## 3 OJ 2.0 10 26.06 2.655058 0.8396031 1.899314
## 4 VC 0.5 10 7.98 2.746634 0.8685620 1.964824
## 5 VC 1.0 10 16.77 2.515309 0.7954104 1.799343
## 6 VC 2.0 10 26.14 4.797731 1.5171757 3.432090
pd <- position_dodge(0.1) # move errorbars to the left and right
ggplot(tgc, aes(x=dose, y=len, colour=supp, group=supp)) +
geom_errorbar(aes(ymin=len-se, ymax=len+se), colour="black", width=.1, position=pd) +
geom_line(position=pd) +
geom_point(position=pd, size=3, shape=21, fill="white") + # 21 is filled circle
xlab("Dose (mg)") +
ylab("Tooth length") +
scale_colour_hue(name="Supplement type", # Legend label, use darker colors
breaks=c("OJ", "VC"),
labels=c("Orange juice", "Ascorbic acid"),
l=40) + # Use darker colors, lightness=40
ggtitle("The Effect of Vitamin C on\nTooth Growth in Guinea Pigs") +
expand_limits(y=0) + # Expand y range
scale_y_continuous(breaks=0:20*4) + # Set tick every 4
theme_bw() +
theme(legend.justification=c(1,0),
legend.position=c(1,0)) # Position legend in bottom right
This next eg is based on Robin Lovelace and James Cheshire’s tutorial (the Rpubs page is here and also here).
library(rgdal)
sport <- readOGR(dsn = "files", "london_sport") # open files/london_sport
## OGR data source with driver: ESRI Shapefile
## Source: "C:\Users\jpn3t\Documents\GitHub\Markdowns\GraphicalTools\files", layer: "london_sport"
## with 33 features
## It has 4 fields
## Integer64 fields read as strings: Pop_2001
# sport.f <- fortify(sport, region = "ons_label") # format shapefile to plot
# head(sport.f)
# # add back attribute information associated with sport object (merge is a data join)
# sport.f <- merge(sport.f, sport@data, by.x = "id", by.y = "ons_label")
# head(sport.f)
sport <- SpatialPolygonsDataFrame(Sr = spTransform(sport,
CRSobj = CRS("+init=epsg:4326")),
data = sport@data)
sport.f <- fortify(sport) # format shapefile to be ploted
sport$id <- row.names(sport) # provide same column names for join
sport.f <- join(sport.f, sport@data)
head(sport.f)
## long lat order hole piece id group ons_label name
## 1 0.03163909 51.44288 1 FALSE 1 0 0.1 00AF Bromley
## 2 0.04152608 51.44046 2 FALSE 1 0 0.1 00AF Bromley
## 3 0.06333280 51.42321 3 FALSE 1 0 0.1 00AF Bromley
## 4 0.07694588 51.43151 4 FALSE 1 0 0.1 00AF Bromley
## 5 0.10922623 51.41360 5 FALSE 1 0 0.1 00AF Bromley
## 6 0.13119092 51.41437 6 FALSE 1 0 0.1 00AF Bromley
## Partic_Per Pop_2001
## 1 21.7 295535
## 2 21.7 295535
## 3 21.7 295535
## 4 21.7 295535
## 5 21.7 295535
## 6 21.7 295535
With a well defined dataframe, gglopt can plot it using polygons (each London borough is called group):
p <- ggplot(sport.f, aes(long, lat, group = group, fill = Partic_Per)) +
geom_polygon() +
coord_equal() +
labs(x = "Easting (m)", y = "Northing (m)", fill = "% Sport Partic.") +
ggtitle("London Sports Participation")
p + scale_fill_gradient(low = "white", high = "black") # black&white version
# ggsave("plot.png", scale = 3, dpi = 400) # to save image
Several operations over the London map:
p <- ggplot() +
geom_polygon(data = sport.f, aes(x = long, y = lat, group = group)) +
coord_map() # this line of code ensures the plot is to scale
plot1 <- p
plot2 <- p + geom_point(aes(x = coordinates(sport)[, 1], y = coordinates(sport)[,2]))
plot3 <- plot2 +
geom_path(data = sport.f, aes(x = long, y = lat, group = group), color = "white") +
theme_classic() # this line removes the distracting grey background
new_theme <- theme(axis.line = element_blank(), axis.ticks = element_blank(),
axis.title.x = element_blank(), axis.title.y = element_blank(),
axis.text.y = element_blank(), axis.text.x = element_blank(),
panel.background = element_rect(fill = "lightgreen"))
plot4 <- p + new_theme
grid.arrange(plot1, plot2, plot3, plot4, nrow=2, ncol=2)
These objects can also be layered in ggmap objects:
library(ggmap)
b <- bbox(sport)
p <- ggmap(get_map(location = b))
plot1 <- p
plot2 <- p +
geom_polygon(data=sport.f, aes(x=long, y=lat, group=group, fill=Partic_Per), alpha=0.5) +
scale_fill_continuous(low = "green", high = "red")
# using a new map source
p <- ggmap(get_map(location = b, source = "stamen", maptype = "toner", crop = T))
plot3 <- p + geom_polygon(data = sport.f,
aes(x=long, y=lat, group=group, fill=Partic_Per), alpha = 0.5)
grid.arrange(plot1, plot2, plot3, nrow=2, ncol=2)
A view of Clifford’s strange attractors, with 1e6 points:
library(Rcpp)
library(ggplot2)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:plyr':
##
## arrange, count, desc, failwith, id, mutate, rename, summarise,
## summarize
## The following object is masked from 'package:MASS':
##
## select
## The following object is masked from 'package:gridExtra':
##
## combine
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
opt = theme(legend.position = "none",
panel.background = element_rect(fill="white"),
axis.ticks = element_blank(),
panel.grid = element_blank(),
axis.title = element_blank(),
axis.text = element_blank())
cppFunction('DataFrame createTrajectory(int n, double x0, double y0,
double a, double b, double c, double d) {
NumericVector x(n); // create the columns
NumericVector y(n);
x[0]=x0;
y[0]=y0;
for(int i = 1; i < n; ++i) {
x[i] = sin(a*y[i-1])+c*cos(a*x[i-1]);
y[i] = sin(b*x[i-1])+d*cos(b*y[i-1]);
}
// return a new data frame
return DataFrame::create(_["x"]= x, _["y"]= y);
}
')
a <- -1.24458046630025
b <- -1.25191834103316
c <- -1.81590817030519
d <- -1.90866735205054
df <- createTrajectory(1000000, 0, 0, a, b, c, d)
#png("Clifford.png", units="px", width=1600, height=1600, res=300)
ggplot(df, aes(x, y)) + geom_point(color="black", shape=46, alpha=.01) + opt
Source: https://fronkonstin.com/2017/11/07/drawing-10-million-points-with-ggplot-clifford-attractors/