ggplot2
allows you to:
What you'll see here is just the beginning! See more at http://docs.ggplot2.org/current/
Syntax for ggplot2 visualizations will look like:
library(ggplot2) ggplot(data = mpg, aes(x = displ, y = hwy)) + geom_point(aes(color = class)) + xlab("Engine Displacement (liters)") + ylab("Highway Mileage") + ggtitle("Fuel Economy vs. Engine Displacement") + theme_bw()
Load these packages to follow along with this tutorial
library(ggplot2) library(readxl) library(dplyr) library(gridExtra)
Import the following data sets from the data folder
supermarket <- read_excel("../data/Supermarket-Transactions.xlsx", sheet = "Data") facebook <- read.delim("../data/facebook.tsv") reddit <- read.csv("../data/reddit.csv") race <- read.csv("../data/race-comparison.csv")
ggplot(data = supermarket) ggplot(data = supermarket, aes(x = `Purchase Date`, y = Revenue))
To display the data we need to tell ggplot what to draw
geom_histogram() |
histogram |
geom_freqpoly() |
frequency polygon |
geom_bar() |
bar chart |
geom_point() |
scatter plot |
geom_line() |
line chart |
geom_boxplot() |
boxplot |
Check out all the available geoms at docs.ggplot2.org/current
ggplot(data = supermarket, aes(x = Revenue)) + geom_histogram() ggplot(data = supermarket, aes(x = Revenue)) + geom_freqpoly() ggplot(data = supermarket, aes(x = Revenue)) + geom_density()
ggplot(data = supermarket, aes(x = Revenue)) + geom_histogram(bins = 100, color = "grey40", fill = "white") ggplot(data = supermarket, aes(x = Revenue)) + geom_freqpoly(bins = 100, color = "blue") ggplot(data = supermarket, aes(x = Revenue)) + geom_density(fill = "red", alpha = .5)
ggplot(data = supermarket, aes(x = `Product Family`)) + geom_bar() summary <- supermarket %>% group_by(`Product Family`) %>% tally() ggplot(data = summary, aes(x = `Product Family`, y = n)) + geom_bar(stat = "identity")
ggplot(data = supermarket, aes(x = `Product Family`)) + geom_bar(fill = "dodgerblue", color = "grey40") ggplot(data = supermarket, aes(x = `Product Family`)) + geom_bar(fill = "dodgerblue", color = "grey40", width = .75) ggplot(data = supermarket, aes(x = `Product Family`)) + geom_bar(fill = "dodgerblue", color = "grey40", width = .99)
ggplot(supermarket, aes(`Purchase Date`, Revenue)) + geom_point() ggplot(supermarket, aes(`Purchase Date`, Revenue)) + geom_point(colour = "blue", size = 1, shape = 5) ggplot(supermarket, aes(`Purchase Date`, Revenue)) + geom_point(colour = "blue", alpha = .25)
ggplot(supermarket, aes(factor(`Units Sold`), Revenue)) + geom_point() ggplot(supermarket, aes(factor(`Units Sold`), Revenue)) + geom_jitter(size = 1) ggplot(supermarket, aes(factor(`Units Sold`), Revenue)) + geom_jitter(size = 1, alpha = .1)
sales_by_date <- supermarket %>% group_by(`Purchase Date`) %>% summarise(Revenue = sum(Revenue, na.rm = TRUE)) ggplot(sales_by_date, aes(`Purchase Date`, Revenue)) + geom_line()
sales_plot <- ggplot(sales_by_date, aes(`Purchase Date`, Revenue)) + geom_line() sales_plot + geom_smooth(span = .1) sales_plot + geom_smooth(span = .9, se = FALSE) sales_plot + geom_smooth(method = "lm", se = FALSE)
ggplot(supermarket, aes(factor(Children), Revenue)) + geom_boxplot() ggplot(supermarket, aes(factor(Children), Revenue)) + geom_boxplot(notch = TRUE, fill = "blue", alpha = .25) ggplot(supermarket, aes(factor(Children), Revenue)) + geom_boxplot(outlier.color = "red", outlier.shape = 1)
Useful for smaller data sets like mpg
ggplot(mpg, aes(class, hwy)) + geom_boxplot() ggplot(mpg, aes(class, hwy)) + geom_boxplot() + geom_jitter(width = .2, alpha = .5) ggplot(mpg, aes(class, hwy)) + geom_violin()
Bar charts can have a y-axis different than just counts
ggplot(supermarket, aes(x = `Product Family`)) + geom_bar() prod_revenue <- supermarket %>% group_by(`Product Family`) %>% summarise(Revenue = sum(Revenue, na.rm = TRUE)) ggplot(prod_revenue, aes(x = `Product Family`, y = Revenue)) + geom_bar(stat = "identity")
ggplot(supermarket, aes(Revenue, color = `Product Family`)) + geom_freqpoly() ggplot(data = supermarket, aes(`Product Family`, fill = Gender)) + geom_bar(position = "dodge") ggplot(supermarket, aes(`Purchase Date`, Revenue, color = Country)) + geom_point()
prod_revenue <- supermarket %>% group_by(`Purchase Date`, `Product Family`) %>% summarise(Revenue = sum(Revenue, na.rm = TRUE)) ggplot(prod_revenue, aes(`Purchase Date`, Revenue, color = `Product Family`)) + geom_line(alpha = .2) + scale_color_manual(values = c("#4f81bd", "#8cc841", "#c0504d")) + geom_smooth(se = FALSE, span = .1)
ggplot(prod_revenue, aes(`Purchase Date`, Revenue)) + geom_line(alpha = .2) + geom_smooth(se = FALSE, span = .1) + facet_wrap(~ `Product Family`)
ggplot(prod_revenue, aes(`Purchase Date`, Revenue)) + geom_blank() + facet_grid(.~ `Product Family`) ggplot(prod_revenue, aes(`Purchase Date`, Revenue)) + geom_blank() + facet_grid(`Product Family` ~.)
p <- ggplot(supermarket, aes(Revenue)) + geom_histogram(bins = 100, color = "grey40", fill = "white") p + scale_x_continuous(name = "Revenue from Individual Transactions", limits = c(10, 50), breaks = seq(10, 50, by = 10), labels = scales::dollar) p + scale_x_log10(labels = scales::dollar)
Shorthand option for controlling the axis limits: xlim
, ylim
, lim
p + xlim(25, 55) p + ylim(0, 400) p + lims(x = c(0, 100), y = c(0, 1000))
Alternative is to use coord_cartesian
; you can also rotate plots with coord_flip
p + coord_cartesian(xlim = c(10, 50), ylim = c(0, 400)) p + coord_flip()
Shorthand option for labelling axes and legends: labs
, xlab
, ylab
, ggtitle
ggplot(prod_revenue, aes(`Purchase Date`, Revenue, color = `Product Family`)) + geom_line(alpha = .2) + geom_smooth(se = FALSE, span = .1) + labs(x = "x-axis title", y = "y-axis title", color = "legend title", title = "Main title")
Legend features can be controlled with guides
and positioning is controlled within theme
ggplot(supermarket, aes(`Purchase Date`, Revenue, color = Country)) + geom_point(alpha = .2) + guides(color = guide_legend(override.aes = list(alpha = 1), reverse = TRUE)) + theme(legend.position = "bottom")
ggplot(supermarket, aes(Revenue)) + geom_histogram(bins = 100, fill = "antiquewhite", color = "grey40") + scale_x_continuous(limits = c(0, 60), breaks = seq(0, 60, by = 10), labels = scales::dollar) + ggtitle("Gross Revenue per Transaction")
ggplot(city_rev, aes(Revenue, City, color = Gender)) + geom_point() + scale_x_continuous(labels = scales::dollar, limits = c(0, 10000), breaks = seq(0, 10000, by = 2000)) + labs(x = NULL, y = NULL, title = "Total Revenue by Gender and Location") + theme_minimal()
Several theme
options are available (even more in the ggthemes
package)
p <- ggplot(supermarket, aes(Revenue)) + geom_histogram(bins = 100, fill = "antiquewhite", color = "grey40") p + theme_classic() p + theme_minimal() p + theme_dark()
basic
## `geom_smooth()` using method = 'loess'
basic + theme_minimal() + theme( text = element_text(family = "Georgia"), plot.title = element_text(face = "bold", size = 16), legend.position = "top", axis.ticks = element_line(colour = "grey70", size = 0.2), panel.grid.major.y = element_line(linetype = "dashed", color = "darkgray"), panel.grid.major.x = element_blank(), panel.grid.minor = element_blank() )
Function | Details |
---|---|
ggplot() |
Call the data and map to x and y coordinates |
geom_xx() |
Map data to geometric shape to plot on graph |
facet_xx() |
Create small multiples by facetting |
scale... |
Adjust x and y scale parameters |
xlim , ylim , lims |
Shorthand for controlling axis limits |
coord_cartesian() |
Alternative to control axis limits |
coord_flip() |
Rotate x and y axes |
labs , xlab , ylab |
Shorthand for creating titles |
guides() |
Control legend features |
theme() |
Control theme features |