Visualization with ggplot2

## 1.1: Load the required packages
library(gapminder)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(ggplot2)

1.2: Look at the gapminder dataset

gapminder
## # A tibble: 1,704 × 6
##    country     continent  year lifeExp      pop gdpPercap
##    <fct>       <fct>     <int>   <dbl>    <int>     <dbl>
##  1 Afghanistan Asia       1952    28.8  8425333      779.
##  2 Afghanistan Asia       1957    30.3  9240934      821.
##  3 Afghanistan Asia       1962    32.0 10267083      853.
##  4 Afghanistan Asia       1967    34.0 11537966      836.
##  5 Afghanistan Asia       1972    36.1 13079460      740.
##  6 Afghanistan Asia       1977    38.4 14880372      786.
##  7 Afghanistan Asia       1982    39.9 12881816      978.
##  8 Afghanistan Asia       1987    40.8 13867957      852.
##  9 Afghanistan Asia       1992    41.7 16317921      649.
## 10 Afghanistan Asia       1997    41.8 22227415      635.
## # ℹ 1,694 more rows
## 1.3: Create a subset of gapminder data set.
## Create gapminder_1957
gapminder_1957 <- gapminder %>%
  filter(year == 1957)
##  Plot a scatterplot pop on the x-axis and lifeExp on the y-axis

ggplot(gapminder_1957, aes(x = pop, y= lifeExp)) + geom_point()
##  Change to put pop on the x-axis and gdpPercap on the y-axis
ggplot(gapminder_1957, aes(x = pop, y= gdpPercap)) + geom_point()
##  Create a scatter plot with gdpPercap on the x-axis 
## and lifeExp on the y-axis
ggplot(gapminder_1957, aes(x = gdpPercap, y= lifeExp)) + geom_point()
## Change this plot to put the x-axis on a log scale
ggplot(gapminder_1957, aes(x = pop, y= lifeExp)) + geom_point() +
 scale_x_log10()
##  Scatter plot comparing pop and gdpPercap,
## with both axes on a log scale

ggplot(gapminder_1957, aes(x = pop, y= gdpPercap)) + geom_point() +
  scale_x_log10() +
  scale_y_log10()
## with color representing continent

ggplot(gapminder_1957, aes(x = pop, y= lifeExp, color = continent)) + geom_point() +
  scale_x_log10() 
##  Add the size aesthetic to represent a country's gdpPercap
ggplot(gapminder_1957, aes(x = pop, y= lifeExp, color = continent,size = gdpPercap)) + geom_point() +
  scale_x_log10() 
## Scatter plot comparing pop and lifeExp, faceted by continent
ggplot(gapminder_1957, aes(x = pop, y= lifeExp)) + geom_point() +
  scale_x_log10() +
  facet_wrap(~continent)
## Scatter plot comparing gdpPercap and lifeExp, with color 
## representing continent and size representing population, faceted by year
ggplot(gapminder, aes(x = gdpPercap, y= lifeExp, color = continent,size = pop)) + geom_point() +
  scale_x_log10() +
  facet_wrap(~year)
##  Create a variable by_year that gets the median life expectancy
## for each year
by_year <- gapminder %>%
  group_by(year) %>%
   summarise(medianLifeExp = median(lifeExp))
##  Create a scatter plot showing the change in medianLifeExp over time
ggplot(by_year,aes(x = year, y= medianLifeExp)) +
  geom_point() +
  expand_limits(y = 0)
##  Summarize medianGdpPercap within each continent within each year: 
## by_year_continent

by_year_continent <- gapminder %>%
  group_by(year, continent) %>%
  summarize(medianGdpPercap = median(gdpPercap))
## `summarise()` has grouped output by 'year'. You can override using the
## `.groups` argument.
##  Plot the change in medianGdpPercap in each continent over time
ggplot(by_year_continent,aes(x = year , y = medianGdpPercap,
                           color = continent  )) +
  geom_point() +
  expand_limits(y = 0)
##  Summarize the median GDP and median life expectancy
## per continent in 2007
by_continent_2007 <- gapminder %>%
  filter(year == 2007) %>%
  group_by(continent) %>%
  summarize(medianLifeExp = median(lifeExp),
            medianGdpPercap = median(gdpPercap))
##  Use a scatter plot to compare the median GDP 
## and median life expectancy

ggplot(by_continent_2007, aes(x = medianLifeExp , y = medianGdpPercap, color = continent )) +
  geom_point() 
##  Summarize the median gdpPercap by year,
## then save it as by_year
by_year <- gapminder %>%
  group_by(year) %>%
  summarize(medianGdpPercap = median(gdpPercap))
##  Create a line plot showing the change in medianGdpPercap over time

ggplot(by_year, aes(x = year , y = medianGdpPercap )) +
  geom_line() +
  expand_limits( y = 0)
## Summarize the median gdpPercap by year & continent,
## save as by_year_continent
by_year_continent <- gapminder %>%
  group_by(year, continent) %>%
  summarize(medianGdpPercap = median(gdpPercap))
## `summarise()` has grouped output by 'year'. You can override using the
## `.groups` argument.
##  Create a line plot showing the change in 
## medianGdpPercap by continent over time

ggplot(by_year_continent, aes(x = year , y = medianGdpPercap , color = continent)) +
  geom_line() +
  expand_limits( y = 0)
##  Summarize the median gdpPercap by continent in 1957
by_continent <- gapminder %>%
  filter(year == 1957) %>%
  group_by(continent) %>%
  summarize(medianGdpPercap = median(gdpPercap))
##  Create a bar plot showing medianGdp by continent
ggplot(by_continent , aes(x = continent , y = medianGdpPercap , )) +
  geom_col() +
  expand_limits( y = 0)
## Visualizing GDP per capita by country in Oceania
## Filter for observations in the Oceania continent in 1957
oceania_1957 <- gapminder %>%
  filter(continent == "Oceania", year == 1957)
##  Filter the dataset for the year 1957. Create a new column called
## pop_by_mil. Save this in a new variable called gapminder_1957
gapminder_1957 <- gapminder %>%
  filter(year == 1957) %>%
  mutate(pop_by_mil = pop/1000000)
##  Create a histogram of population (pop_by_mil)
ggplot(gapminder_1957 , aes(x = pop_by_mil  )) +
  geom_histogram(bins = 50)
##  Recreate the gapminder_1957 and filter for the year 1957 only

gapminder_1957 <- gapminder %>%
  filter(year == 1957)
## Create a histogram of population (pop), with x on a log scale
ggplot(gapminder_1957 , aes(x = pop)) +
  geom_histogram() +
  scale_x_log10()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
##  Create the gapminder_1957 and filter for the year 1957 only
gapminder_1957 <- gapminder %>%
  filter(year == 1957)
## Create a boxplot comparing gdpPercap among continents
ggplot(gapminder_1957, aes(x = continent, y = gdpPercap)) +
  geom_boxplot() +
  scale_y_log10()
##  Add a title to this graph: 
## "Comparing GDP per capita across continents"
ggplot(gapminder_1957, aes(x = continent, y = gdpPercap)) +
  geom_boxplot() +
  scale_y_log10() +
  ggtitle('Comparing GDP per capital across Continents')