Questions
Use the data of your own. Produce the following types of plots and comment on each plot. Plots should be meaningful. If you use the data we used in class, make sure the plots are not the same as the ones in the slides. All plots should have title, caption, appropriate labels on x and y-axis.
library(tidyverse)
library(gganimate)
library(knitr)
library(gifski)
df <- read_csv('https://covid19.who.int/WHO-COVID-19-global-data.csv')
df$Country_code <- NULL
df <- df %>%
rename(date = Date_reported)
library(lubridate)
df$date <- ymd(df$date)
df$month <- month(df$date)
df1 <- df %>%
group_by(Country,month) %>%
summarise(mean = mean(New_deaths))
df2 <- df1 %>%
group_by(month) %>%
mutate(rank = rank(-mean)) %>%
filter(rank <= 10)
animation <- df2 %>%
ggplot(aes(x=rank,y=mean,group=Country,fill=Country,label=Country)) +
geom_col() + geom_text(aes(y = mean, label = Country), hjust = 1.4) + coord_flip(clip = "off", expand = FALSE) +scale_x_reverse() +
labs(title = 'Month {closest_state}', x='', y='Number of Deaths', fill='Country')+
theme(plot.title = element_text(hjust = 1, size = 22),
axis.ticks.y = element_blank(),
axis.text.y = element_blank()) + transition_states(month) +
ease_aes("cubic-in-out")
animate(animation, nframes = 400, fps = 10)
# The US dominated the average number of deaths per month pretty early on, but as of right now, India has the highest average number of deaths.
df3 <- df %>%
group_by(WHO_region,month) %>%
summarise(mean = mean(New_cases))
df4 <- df3 %>%
group_by(month) %>%
mutate(rank = rank(-mean)) %>%
filter(rank <=5)
animation1 <- df4 %>%
ggplot(aes(x=rank,y=mean,group=WHO_region,fill=WHO_region,label=WHO_region)) +
geom_col() + geom_text(aes(y = mean, label = WHO_region), hjust = 1.4) + coord_flip(clip = "off", expand = FALSE) +scale_x_reverse() +
labs(title = 'Month {closest_state}', x='', y='Number of Cases', fill='Region')+
theme(plot.title = element_text(hjust = 1, size = 22),
axis.ticks.y = element_blank(),
axis.text.y = element_blank()) + transition_states(month) +
ease_aes("cubic-in-out")
animate(animation1, nframes = 400, fps = 10)
# This bar race animation looks at the average number of cases per month by region. The SEARO had the largest number of cases by the end of September.
df5 <- read_csv('GLOB.SES.csv')
df6 <- df5 %>%
filter(year >= 1990) %>%
group_by(country,year) %>%
summarise(mean = mean(SES))
df7 <- df6 %>%
group_by(year) %>%
mutate(rank = rank(-mean)) %>%
filter(rank <= 7)
animation2 <- df7 %>%
ggplot(aes(x=rank,y=mean,group=country,fill=country,label=country)) +
geom_col() + geom_text(aes(y = mean, label = country), hjust = 1.4) + coord_flip(clip = "off", expand = FALSE) +scale_x_reverse() +
labs(title = 'Year: {closest_state}', x='', y='Socioeconomic Status Score', fill='Country') +
theme(plot.title = element_text(hjust = 1, size = 22),
axis.ticks.y = element_blank(),
axis.text.y = element_blank()) + transition_states(year) +
ease_aes("cubic-in-out")
animate(animation2, nframes = 100, fps = 10)
# This animation shows the changes in the top countries' socioeconomic status score, but the battle for the top spot between 1990 and 2010 is consistency between the US and Norway.