Questions

Use the data of your own. Produce the following types of plots and comment on each plot. Plots should be meaningful. If you use the data we used in class, make sure the plots are not the same as the ones in the slides. All plots should have title, caption, appropriate labels on x and y-axis.


  1. Use the WHO’s dataset at this link. Make a top-10 bar race by months between countries on the number of deaths by Covid 19.
library(tidyverse)
library(gganimate)
library(knitr)
library(gifski)
df <- read_csv('https://covid19.who.int/WHO-COVID-19-global-data.csv')
df$Country_code <- NULL
df <- df %>% 
  rename(date = Date_reported)
library(lubridate)
df$date <- ymd(df$date)
df$month <- month(df$date)
df1 <- df %>% 
  group_by(Country,month) %>% 
  summarise(mean = mean(New_deaths))
df2 <- df1 %>% 
  group_by(month) %>% 
  mutate(rank = rank(-mean)) %>% 
  filter(rank <= 10)
animation <- df2 %>% 
  ggplot(aes(x=rank,y=mean,group=Country,fill=Country,label=Country)) +
  geom_col()  + geom_text(aes(y = mean, label = Country), hjust = 1.4) + coord_flip(clip = "off", expand = FALSE) +scale_x_reverse() +
  labs(title = 'Month {closest_state}', x='', y='Number of Deaths', fill='Country')+
    theme(plot.title = element_text(hjust = 1, size = 22),
          axis.ticks.y = element_blank(),
          axis.text.y  = element_blank()) + transition_states(month) +
    ease_aes("cubic-in-out")
animate(animation, nframes = 400, fps = 10)

# The US dominated the average number of deaths per month pretty early on, but as of right now, India has the highest average number of deaths.
  1. Make another bar race using that dataset.
df3 <- df %>% 
  group_by(WHO_region,month) %>% 
  summarise(mean = mean(New_cases))
df4 <- df3 %>% 
  group_by(month) %>% 
  mutate(rank = rank(-mean)) %>% 
  filter(rank <=5)
animation1 <- df4 %>% 
  ggplot(aes(x=rank,y=mean,group=WHO_region,fill=WHO_region,label=WHO_region)) +
  geom_col()  + geom_text(aes(y = mean, label = WHO_region), hjust = 1.4) + coord_flip(clip = "off", expand = FALSE) +scale_x_reverse() +
  labs(title = 'Month {closest_state}', x='', y='Number of Cases', fill='Region')+
    theme(plot.title = element_text(hjust = 1, size = 22),
          axis.ticks.y = element_blank(),
          axis.text.y  = element_blank()) + transition_states(month) +
    ease_aes("cubic-in-out")
animate(animation1, nframes = 400, fps = 10)

# This bar race animation looks at the average number of cases per month by region. The SEARO had the largest number of cases by the end of September.
  1. Make a bar race using a dataset of your own interest.
df5 <- read_csv('GLOB.SES.csv')
df6 <- df5 %>% 
  filter(year >= 1990) %>% 
  group_by(country,year) %>% 
  summarise(mean = mean(SES))
df7 <- df6 %>% 
  group_by(year) %>% 
  mutate(rank = rank(-mean)) %>% 
  filter(rank <= 7)
animation2 <- df7 %>% 
  ggplot(aes(x=rank,y=mean,group=country,fill=country,label=country)) +
  geom_col()  + geom_text(aes(y = mean, label = country), hjust = 1.4) + coord_flip(clip = "off", expand = FALSE) +scale_x_reverse() +
  labs(title = 'Year: {closest_state}', x='', y='Socioeconomic Status Score', fill='Country') +
    theme(plot.title = element_text(hjust = 1, size = 22),
          axis.ticks.y = element_blank(),
          axis.text.y  = element_blank()) + transition_states(year) +
    ease_aes("cubic-in-out")
animate(animation2, nframes = 100, fps = 10)

# This animation shows the changes in the top countries' socioeconomic status score, but the battle for the top spot between 1990 and 2010 is consistency between the US and Norway.