Input: a data frame
Output: a data frame with all the missing of numeric variables replaced by the associated means.
Hint: Similar function
library(tidyverse)
df <- read_csv('titanic.csv')
df <- df %>% select(-Cabin)
# There are no missing values in this data, but this data will be used to test out the graphing functions.
insurance <- read_csv('insurance.csv')
impute_mean <- function(x)
{
  if(is.numeric(x))
  {
    mean <-  mean(x,na.rm=TRUE)
    library(tidyr)
    x <- replace_na(x, mean)
  }
  return(x)
}
df_impute <- function(x)
{
  for(i in 1:length(x))
  {
    x[[i]] <- impute_mean(x[[i]])
  }
  return(x)
}
colSums(is.na(df))## PassengerId    Survived      Pclass        Name         Sex         Age 
##           0           0           0           0           0         177 
##       SibSp       Parch      Ticket        Fare    Embarked 
##           0           0           0           0           2df1 <- df_impute(df)
colSums(is.na(df1))## PassengerId    Survived      Pclass        Name         Sex         Age 
##           0           0           0           0           0           0 
##       SibSp       Parch      Ticket        Fare    Embarked 
##           0           0           0           0           2Input: a data frame
Output: a data frame with all the missing of variables replaced by the associated means (for numeric variables) or modes (for non-numeric variables).
Hint: Combine the function in Problem 1 and the function in this example
impute_all <- function(x)
{
  if(sum(is.na(x)>0) && is.numeric(x))
  {
    mean <-  mean(x,na.rm=TRUE)
    library(tidyr)
    x <- replace_na(x, mean)
  }
  if(sum(is.na(x)>0) && !is.numeric(x))
  {
    mode <- names(sort(-table(x)))[1]
    x <- replace_na(x, mode)
  }
  return(x)
}
df_impute_all <- function(x)
{
  for(i in 1:length(x))
  {
    x[[i]] <- impute_all(x[[i]])
  }
  return(x)
}
colSums(is.na(df))## PassengerId    Survived      Pclass        Name         Sex         Age 
##           0           0           0           0           0         177 
##       SibSp       Parch      Ticket        Fare    Embarked 
##           0           0           0           0           2df2 <- df_impute_all(df)
colSums(is.na(df2))## PassengerId    Survived      Pclass        Name         Sex         Age 
##           0           0           0           0           0           0 
##       SibSp       Parch      Ticket        Fare    Embarked 
##           0           0           0           0           0Input: a data frame
Output: Bar plots of all non-numeric variables
Hint: Similar function
bar_plot <- function(d)
{
  library(ggplot2)
  for (i in 1:length(d))
  {
    if (!is.numeric(d[[i]]))
    {
      print(ggplot(d, aes(x = d[[i]]))+ 
              geom_bar()+
              labs(x = names(d)[i]))
    }
  }
}
df2$Pclass <- factor(df2$Pclass)
df2$Survived <- factor(df2$Survived)
bar_plot(df2)bar_plot(insurance)Input: a data frame
Output: all possible the bar plots of a non-numeric variable filled by a non-numeric variable.
Hint: Similar function
bar_plot2 <- function(d)
{
  library(ggplot2)
  l <- length(d)
  for (i in 1:(l-1))
    for (j in (i+1):l)
  {
    if (!is.numeric(d[[i]])& (!is.numeric(d[[j]])))
    {
      print(ggplot(d, aes(x = d[[i]], fill = d[[j]]))+ 
              geom_bar()+labs(x = names(d)[i], fill = names(d)[j]))
    }
  }
}
bar_plot2(df)bar_plot2(insurance)Input: a data frame
Output:
all possible the bar plots of a non-numeric variable filled by a non-numeric variable.
all possible the density plots of a numeric variable colored by a non-numeric variable
all possible the scatter plots.
Hint: Combine this function, this function, and the function in Question 4. One way to combine is creating a new function, quick_plot, and call these three functions within quic_kplot.
scatter_plot <- function(d)
{
  library(ggplot2)
  l <- length(d)
  for (i in 1:(l-1))
    for (j in (i+1):l)
  {
    if (is.numeric(d[[i]])&is.numeric(d[[j]]))
    {
      print(ggplot(d, aes(x = d[[i]], y = d[[j]]))+ 
              geom_point()+
              labs(x = names(d)[i], y = names(d)[j]))
    }
  }
}
density_plot2 <- function(d)
{
  library(ggplot2)
  l <- length(d)
  for (i in 1:(l-1))
    for (j in (i+1):l)
  {
    if (is.numeric(d[[i]])& (!is.numeric(d[[j]])))
    {
      print(ggplot(d, aes(x = d[[i]], color = d[[j]]))+ 
              geom_density()+labs(x = names(d)[i], color = names(d)[j]))
    }
  }
}
quick_plot <- function(d)
{
    scatter_plot(d)
    density_plot2(d)
    bar_plot2(d)
}
quick_plot(insurance)