You will need the ggplot2 package installed.
install.packages("ggplot2")
Load the package:
library("ggplot2")
Load the data:
counts_raw <- read.delim("data/counts-raw.txt.gz")
Select only the research articles:
library("dplyr")
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
research <- counts_raw %>%
filter(articleType == "Research Article")
Let’s make our first plot:
p <- ggplot(data = research,
mapping = aes(x = pdfDownloadsCount,
y = wosCountThru2011))
p + geom_point()
p <- ggplot(data = research,
mapping = aes(x = pdfDownloadsCount,
y = wosCountThru2011)) +
geom_point()
p
p <- ggplot(research,
aes(x = pdfDownloadsCount,
y = wosCountThru2011)) +
geom_point(aes(color = journal)) +
geom_smooth()
p
p <- ggplot(research,
aes(x = pdfDownloadsCount,
y = wosCountThru2011,
color = journal)) +
geom_point(shape = 1, alpha = 0.25) +
geom_smooth()
p
Create a scatter plot with daysSincePublished mapped to the x-axis and wosCountThru2011 mapped to the y-axis. Include a loess fit of the data. Set the transparency level (alpha) of the points to 0.5 and color the points according to the journal where the article was published. Make the loess curve red.
p <- ggplot(research, aes(x = daysSincePublished,
y = wosCountThru2011,
color = journal)) +
geom_point(alpha = 0.5) +
geom_smooth()
p
Changing the scales of the plot
p <- ggplot(research, aes(x = pdfDownloadsCount,
y = wosCountThru2011)) +
geom_point(aes(color = journal)) +
geom_smooth() +
scale_x_sqrt() +
scale_y_sqrt() +
# scale_color_grey() +
scale_color_manual(values = c("red", "blue",
"green", "black",
"grey", "yellow",
"purple"))
p
Better colors:
library("RColorBrewer")
display.brewer.all(type = "qual")
p <- ggplot(research, aes(x = pdfDownloadsCount,
y = wosCountThru2011)) +
geom_point(aes(color = journal), alpha = 0.5) +
geom_smooth() +
scale_x_sqrt() +
scale_y_sqrt() +
scale_color_brewer(palette = "Dark2")
p
Split the plot by journal:
p <- ggplot(research, aes(x = pdfDownloadsCount,
y = wosCountThru2011)) +
geom_point(aes(color = journal), alpha = 0.5) +
geom_smooth() +
scale_x_sqrt() +
scale_y_sqrt() +
scale_color_brewer(palette = "Dark2") +
# facet_wrap(~journal)
facet_grid(year ~ journal)
p
What does citation relationship look like for immunology papers?
research <- research %>%
mutate(immuno = grepl("Immunology", plosSubjectTags))
p <- ggplot(research, aes(x = pdfDownloadsCount,
y = wosCountThru2011)) +
geom_point() +
facet_wrap(~immuno)
p
Add another variable to research called evolution, which is a logical vector indicating if the article has the PLOS subject tag “Evolutionary Biology”. Use facet_grid to create subplots based on the variables evolution and immuno.
research <- research %>%
mutate(evolution = grepl("Evolutionary Biology",
plosSubjectTags))
research <- research %>%
mutate(immuno = grepl("Immunology", plosSubjectTags))
p <- ggplot(research, aes(x = pdfDownloadsCount,
y = wosCountThru2011)) +
geom_point() +
facet_grid(evolution~immuno, labeller = "label_both")
p
Combine dplyr and ggplot2:
tweets_per_journal <- research %>%
group_by(journal) %>%
summarize(num = n(),
mean = mean(backtweetsCount),
sem = sd(backtweetsCount) / sqrt(num))
tweets_per_journal
## Source: local data frame [7 x 4]
##
## journal num mean sem
## <fctr> <int> <dbl> <dbl>
## 1 pbio 1325 0.05811321 0.020153395
## 2 pcbi 1351 0.12657291 0.052177184
## 3 pgen 1619 0.06547251 0.020408525
## 4 pmed 643 0.31104199 0.187868371
## 5 pntd 621 0.02576490 0.009057697
## 6 pone 14078 0.49303878 0.034484187
## 7 ppat 1459 0.02604524 0.008807428
Make barplot with ggplot2:
tweets_bar <- ggplot(tweets_per_journal,
aes(x = journal,
y = mean)) +
geom_bar(stat = "identity") +
geom_errorbar(aes(ymin = mean - sem,
ymax = mean + sem), width = 0.1)
tweets_bar
Save ggplot2 plots:
?ggsave