To plot a line graph in ggplot2, you need:
ggplot()
objectgeom_line()
object with a defined aesthetic mapping (aes()
)Here’s an example:
library(ggplot2)
df <- data.frame(
x = c(0, 1, 2, 3, 4, 5, 6),
y = c(0, 1, 4, 9, 16, 25, 36)
)
p <- ggplot()
p <- p + geom_line(data = df, aes(x = x, y = y))
print(p)
In the above example, the y-values were simply the x-values squared, so we could have achieved the same thing with an equation as opposed to explicit values:
df <- data.frame(
x = c(0, 1, 2, 3, 4, 5, 6)
)
p <- ggplot()
p <- p + geom_line(data = df, aes(x = x, y = x**2))
print(p)
If you want to plot additional lines on the same set of axes, add more geom_line()
objects:
df <- data.frame(
x = c(0, 1, 2, 3, 4, 5, 6)
)
p <- ggplot()
# Add a straight line
p <- p + geom_line(data = df, aes(x = x, y = x))
# Add a parabola
p <- p + geom_line(data = df, aes(x = x, y = x**2))
print(p)
To change the title and labels, create ggtitle()
, ylab()
and xlab()
objects:
df <- data.frame(
x = c(0, 1, 2, 3, 4, 5, 6)
)
p <- ggplot()
p <- p + geom_line(data = df, aes(x = x, y = x))
p <- p + geom_line(data = df, aes(x = x, y = x**2))
p <- p + ggtitle("y = x and y = x^2")
p <- p + ylab("y-values")
p <- p + xlab("x-values")
print(p)
Of course, having “x^2” instead of “x²” in the title doesn’t look great. To improve this text formatting we have three options, each with their own pros and cons:
Here are examples of each option:
This is the simplest option: find the symbols you want (eg by Googling them) and copy-paste them into your code:
df <- data.frame(x = seq(0, 2, 0.01))
p <- ggplot()
p <- p + geom_line(data = df, aes(x = x, y = x))
p <- p + geom_line(data = df, aes(x = x, y = x**2))
p <- p + ggtitle("β = α and β = α² for 0 ≥ α ≥ 2")
p <- p + ylab("Output, β (mΩ)")
p <- p + xlab("Input, α (μs)")
print(p)
A slightly more complicated option is to look up the Unicode codes for the symbols and use those prepended by a " to let R know you want to interpret it as Unicode:
df <- data.frame(x = seq(0, 2, 0.01))
p <- ggplot()
p <- p + geom_line(data = df, aes(x = x, y = x))
p <- p + geom_line(data = df, aes(x = x, y = x**2))
p <- p + ggtitle("\U03B2 = \U03B1 and \U03B2 = \U03B1\U00B2 for 0 \U2265 \U03B1 \U2265 2")
p <- p + ylab("Output, \U03B2 (m\U03A9)")
p <- p + xlab("Input, \U03B1 (\U03BCs)")
print(p)
By using the latex2exp
package, you can use the full power of Latex to generate proper-looking labels. Note, however, that backslashes have a special meaning in R and you need to escape this by using a second backslash (an escaping backslash) in order to allow the first backslash be interpreted as such by Latex:
library(latex2exp)
df <- data.frame(x = seq(0, 2, 0.01))
p <- ggplot()
p <- p + geom_line(data = df, aes(x = x, y = x))
p <- p + geom_line(data = df, aes(x = x, y = x**2))
p <- p + ggtitle(TeX("\\beta = \\alpha and \\beta = \\alpha$^2$ for 0\\geq\\alpha\\geq{}2"))
p <- p + ylab(TeX("Output, \\beta (m\\Omega)"))
p <- p + xlab(TeX("Input, \\alpha (\\mu{}s)"))
print(p)
Colours and legends are added automatically by ggplot2:
df <- data.frame(
human_age = c(1:16, 1:16, 1:16),
dog_age = c(
15, 24, 28, 32, 36, 45, 50, 55, 61, 66, 72, 77, 82, 88, 93, 120,
15, 24, 28, 32, 36, 42, 47, 51, 56, 60, 65, 69, 74, 78, 83, 87,
15, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60, 64, 68, 72, 76, 80
),
dog_size = c(rep("Large", 16), rep("Medium", 16), rep("Small", 16))
)
p <- ggplot(
df, aes(x = human_age, y = dog_age, group = dog_size, col = dog_size)
)
p <- p + geom_line()
p <- p + ggtitle("How Old is a Dog in Dog Years?")
p <- p + ylab("Age in Dog Years")
p <- p + xlab("Age in Human Years")
p <- p + labs(colour = "Dog Size")
print(p)
FYI, the above plot comes from this page.
If you don’t like the colours ggplot2 chooses for you, you can change them with the scale_color_manual()
function:
p <- ggplot(
df, aes(x = human_age, y = dog_age, group = dog_size, col = dog_size)
)
p <- p + geom_line()
p <- p + ggtitle("How Old is a Dog in Dog Years?")
p <- p + ylab("Age in Dog Years")
p <- p + xlab("Age in Human Years")
p <- p + labs(colour = "Dog Size")
p <- p + scale_color_manual(values = c("blue", "red", "orange"))
print(p)
The thicknesses of the lines can be changed with the scale_size_manual()
function:
p <- ggplot(
df, aes(
x = human_age, y = dog_age,
group = dog_size, colour = dog_size, size = dog_size
)
)
p <- p + geom_line()
p <- p + ggtitle("How Old is a Dog in Dog Years?")
p <- p + ylab("Age in Dog Years")
p <- p + xlab("Age in Human Years")
p <- p + labs(colour = "Dog Size", size = "Dog Size")
p <- p + scale_color_manual(values = c("blue", "red", "orange"))
p <- p + scale_size_manual(values = c(3, 2, 1))
print(p)
The style of the lines can be changed with the scale_linetype_manual()
function:
p <- ggplot(
df, aes(
x = human_age, y = dog_age,
group = dog_size, colour = dog_size,
size = dog_size, linetype = dog_size
)
)
p <- p + geom_line()
p <- p + ggtitle("How Old is a Dog in Dog Years?")
p <- p + ylab("Age in Dog Years")
p <- p + xlab("Age in Human Years")
p <- p + labs(colour = "Dog Size", size = "Dog Size", linetype = "Dog Size")
p <- p + scale_color_manual(values = c("blue", "red", "orange"))
p <- p + scale_size_manual(values = c(1, 1, 1))
p <- p + scale_linetype_manual(values = c("twodash", "longdash", "dotted"))
print(p)
p <- ggplot(
df, aes(
x = human_age, y = dog_age,
group = dog_size, colour = dog_size,
size = dog_size, linetype = dog_size
)
)
p <- p + geom_line()
p <- p + ggtitle("How Old is a Dog in Dog Years?")
p <- p + ylab("Age in Dog Years")
p <- p + xlab("Age in Human Years")
p <- p + labs(colour = "Dog Size", size = "Dog Size", linetype = "Dog Size")
p <- p + scale_color_manual(values = c("blue", "red", "orange"))
p <- p + scale_size_manual(values = c(1, 1, 1))
p <- p + scale_linetype_manual(values = c("twodash", "longdash", "dotted"))
p <- p + theme(panel.grid.major = element_blank())
p <- p + theme(panel.grid.minor = element_blank())
p <- p + scale_y_continuous(limits = c(0, 120))
p <- p + scale_x_continuous(limits = c(0, 16))
print(p)
For this example, the raw data will be imported from a CSV file:
# Import raw data
df <- read.csv(
"https://github.com/rowannicholls/rowannicholls.github.io/blob/master/R/graphs/ggplot2/Points.csv?raw=true",
check.names = FALSE
)
This CSV contains the number of log points for each team in the English Premier League for each match week of the 2018-19 season:
p <- ggplot(df, aes(x = matchweek, y = points, group = team, col = team))
p <- p + geom_line()
p <- p + ggtitle("Premier League 2018-19 Log Points")
p <- p + ylab("Points")
p <- p + xlab("Match Week")
p <- p + labs(colour = "")
p <- p + scale_color_manual(
values = c(
"#ff0000", "#8b0304", "#005daa", "#80bfff", "#224781", "#0000dd",
"#0a4af5", "#274488", "#000000", "#176fc0", "#0101e8", "#dd0000",
"#6caddf", "#e80909", "#000000", "#ed1a3b", "#132257", "#fbee23",
"#7f0000", "#fdbc02"
)
)
print(p)
For this example, use the pre-installed ‘ToothGrowth’ data set. This has data from a study on the effect of vitamin C on tooth growth in guinea pigs. Here’s what the head of the data frame looks like:
head(ToothGrowth)
## len supp dose
## 1 4.2 VC 0.5
## 2 11.5 VC 0.5
## 3 7.3 VC 0.5
## 4 5.8 VC 0.5
## 5 6.4 VC 0.5
## 6 10.0 VC 0.5
…and here is the data plotted with its associated error bars:
df <- ToothGrowth
# Initialise vector to store maximums for each group
upper <- vector()
# Initialise vector to store minimums for each group
lower <- vector()
# Initialise vector to store averages for each group
average <- vector()
# Initialise vector to store sample sizes for each group
n <- vector()
# Analyse each dosage group
doses <- unique(df$dose)
for (dosage in doses) {
sub <- subset(df, dose == dosage)
upper <- c(upper, max(sub$len))
lower <- c(lower, min(sub$len))
average <- c(average, mean(sub$len))
n <- c(n, length(sub$len))
}
# Get values of error bars
ytop <- max(upper)
ybottom <- min(lower)
yrange <- ytop - ybottom
df <- data.frame(doses, average, upper, lower)
# Plot
p <- ggplot(df, aes(x = doses, y = average))
p <- p + geom_point(size = 4)
p <- p + geom_line()
p <- p + geom_errorbar(aes(ymin = lower, ymax = upper), width=0.2)
p <- p + annotate(
'text', x = doses, y = average - 0.1 * yrange,
label = paste('mean =', average)
)
p <- p + annotate(
'text', x = doses, y = ybottom - 0.1 * yrange, label = paste('n =', n)
)
# Add more space at the bottom in order to fit the annotations
p <- p + ylim(ybottom - 0.1 * yrange, ytop)
print(p)
A similar graph can be plotted with help from the ggpubr
package. This approach can be useful for certain plots but can make it more difficult to control its look.
library(ggpubr)
ggline(
ToothGrowth, x = "dose", y = "len", add = c("mean_ci"), color = "supp",
palette = "jco"
)
Finally, save your plot to your computer as an image using one of the following (depending on what format you want the image to be in):
png("File Name.png")
pdf("File Name.pdf")
ggsave("File Name.png")
If you use one of the first two, it must come before you start plotting the graph (ie before you call ggplot()
). If you use the last one (ggsave()
) it must come after you’ve plotted the graph.