Research question

How has the US gender wage gap changed over time for different occupations and age groups?

  • The US gender wage gap may be affecting you!

Data Sources

#reading in the data
jobs_gender <- readr::read_csv("https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2019/2019-03-05/jobs_gender.csv")
earnings_female <- readr::read_csv("https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2019/2019-03-05/earnings_female.csv") 
employed_gender <- readr::read_csv("https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2019/2019-03-05/employed_gender.csv") 

Data Dictionary for employed_gender, earnings_female, and jobs_gender is in the Appendix


Employment Rate

#full time compared to part time
employedfulltime_gender <- employed_gender %>%
  select(year,full_time_female, full_time_male) %>%
  rename(Female = full_time_female, Male = full_time_male) %>%
  gather(key = "gender", value = "fullTime", Female:Male) 
  

employedfulltime_gender %>%  
  ggplot(aes(x=year, y = fullTime, color = gender)) +
  geom_line(size = .5) +
  geom_text_repel(data = employedfulltime_gender %>%
                    filter(year == max(year)), aes(label = gender), hjust=0, nudge_x = 1, direction = "y", size = 4.5, segment.color = NA) +
  geom_point(size =.5) +
  scale_x_continuous(breaks = seq(1968,2016, 4),
                     expand = expansion(add = c(0,11))) +
  scale_y_continuous(breaks = seq(0,100,2)) +
  scale_color_manual(
      values = c('#D95F02', '#1B9E77')) +
  theme_half_open(font_size = 11) + 
  theme(legend.position = 'none') +
  geom_curve(
    data = data.frame(
      x = 1969, xend = 1969, y = 85, yend = 91.5),
    mapping = aes(x = x, xend = xend, y = y, yend = yend),
    color = 'black', size = 0.5, curvature = 0,
    arrow = arrow(length = unit(0.01, "npc"),
                  type = "closed")) +
  geom_smooth(se = FALSE, linetype = 'dashed', method = "lm")+
  geom_curve(
    data = data.frame(
      x = 1969, xend = 1969, y = 83.5, yend = 76),
    mapping = aes(x = x, xend = xend, y = y, yend = yend),
    color = 'black', size = 0.5, curvature = 0,
    arrow = arrow(length = unit(0.01, "npc"),
                  type = "closed")) +
  annotate(geom = 'text', x = 1975, y = 84,
             label = 'Difference of 17.1% in 1968', size = 3, color = 'black') +
  annotate(geom = 'text', x = 2017, y = 82,
            label = 'Difference of 12.5% in 2016', size = 3, color = 'black') +
  geom_curve(
    data = data.frame(
      x = 2017, xend = 2017, y = 82.5, yend = 87),
    mapping = aes(x = x, xend = xend, y = y, yend = yend),
    color = 'black', size = 0.5, curvature = 0,
    arrow = arrow(length = unit(0.01, "npc"),
                  type = "closed")) +
  geom_curve(
    data = data.frame(
      x = 2017, xend = 2017, y = 81, yend = 76),
    mapping = aes(x = x, xend = xend, y = y, yend = yend),
    color = 'black', size = 0.5, curvature = 0,
    arrow = arrow(length = unit(0.01, "npc"),
                  type = "closed"))+
  scale_y_continuous(labels = scales::percent_format(scale =1, accuracy = 1)) +
  labs(x = "Year", y = "Percent Employed Full-time by Gender ", title = "Trends of Genders Working Full Time")

# calculate the difference in 1968
# male = 92.2 female = 75.1 difference = 17.1

# calculate the difference in 2016
# male = 87.6 female = 75.1, difference = 12.5

After analyzing this chart, we decided to look into whether occupation or age group have a larger effect on the gap.


Wage Gap By Age Group

Not only is there an evident difference between the distribution of the percent of genders working full time, there is also a reflection of a gender difference in salary. The following chart, “Age Trends of Female Salary as a Percentage of Male Salary” shows female salaries as a percentage of male salaries. This chart shows that since 1979 the total trends of female salary as a percent of male salary for all ages has increased over time. In 1979, the female salary as a percentage of male was 62.3% and 80.9% in 2011. While this is an 18.6% increase, females are still not paid the same percentage as male. As of 2011, there is still a 19.1% gap .

agegroupearnings <- earnings_female %>%
  filter(group != "Total, 16 years and older")
totalearnings <-  earnings_female %>%
  filter(group == "Total, 16 years and older")

ggplot() +
  geom_line(data = agegroupearnings, aes(x=Year, y = percent, color = group), alpha = 0.45, size =.5) +
  geom_line(data = totalearnings, aes(x=Year, y = percent, color = group), alpha = 1.2, size =.7) +
  geom_text_repel(data = earnings_female %>%
                    filter(Year == max(Year)), aes(x= Year, y = percent, label = group, color = group), hjust=0, nudge_x = 1, direction = "y", size = 4.3, segment.color = NA) +
  scale_x_continuous(breaks = seq(1980,2011, 5),
                     expand = expansion(add = c(0,15.5))) +
  scale_y_continuous(labels = scales::percent_format(scale =1)) + 
  scale_color_brewer(palette="Dark2") +
  theme_half_open(font_size = 11) + 
  theme(legend.position = 'none') +
  labs(x= "Year", y = "Female salary percent of male salary", title = "Age Trends of Female Salary as a Percentage of Male Salary")

To get a closer look at salary distribution, an analysis of the top 10 highest paying jobs may help clear up a potential assumption that the age gap could be due to the the fact that men are paid more because they are simply employed with better earning job titles. The follow graphs shed light that even in the same occupation, there is still a gender wage gap.


Wage Gap By Occupation

2016

jobsDumbellData2016 <- jobs_gender %>%
  filter(year== 2016) %>%
  mutate(wage_dif = total_earnings_male - total_earnings_female) %>%
  select(occupation, total_earnings_male, total_earnings_female, wage_dif) %>%
  arrange(desc(total_earnings_male)) %>%
  slice(1:10) %>%
  gather(key = "gender_earnings", value = "income", total_earnings_male:total_earnings_female) %>%
  mutate(
    gender = if_else(
      gender_earnings == 'total_earnings_male', 'Male', 'Female')) %>%
  mutate(
    occupation = fct_reorder2(occupation,
      gender, desc(income)),
    gender = as.factor(gender))

top10Jobs2016 <- rev(c("Physicians and surgeons",
                   "Nurse anesthetists",
                   "Dentists",
                   "Petroleum engineers",
                   "Chief executives",
                   "Podiatrists",
                   "Lawyers",
                   "Mathematicians",
                   "Architectural and engineering managers",
                   "Optometrists"))

jobsDumbell2016 <- ggplot(jobsDumbellData2016,
       aes(x = income, y = occupation)) +
  geom_line(aes(group = occupation),
            color = 'grey', size = 1) +
  geom_point(aes(color = gender), size = 2.5, show.legend = FALSE) +
  theme_minimal_vgrid(font_size = 11) +
  theme(
    axis.line.y = element_blank(),
    axis.ticks.y = element_blank()) +
  labs(x = 'Estimated Median Earnings (US$)',
       y = 'Occupation',
       color = 'Gender',
       title = 'Wage Gap for the 10 Highest Paid Jobs', subtitle = "From year 2016") +
  geom_rect(data=jobsDumbellData2016, aes(xmin=250000, xmax=300000, ymin=-Inf, ymax=Inf), fill="grey") +
geom_text(data=jobsDumbellData2016, aes(label=scales::dollar(wage_dif), y=occupation, x=275000), color = "black", fontface="bold", size=3) +
geom_text(data=filter(jobsDumbellData2016, occupation=="Physicians and surgeons"), 
            aes(x=275000, y=occupation, label="Difference"),
            color="black", size=3, vjust=-2, fontface="bold") +
scale_x_continuous(expand=expansion(mult=c(.015,0.05)), limits=c(50000, 300000), labels = scales::dollar, breaks = seq(50000,250000,50000)) +
  scale_color_manual(
      values = c('#D95F02', '#1B9E77')) +
scale_y_discrete(expand=c(0.12,0)) +
  geom_text(data=filter(jobsDumbellData2016, occupation=="Physicians and surgeons"),
          aes(x=231420, y=occupation, label="Male"),
          color="#1B9E77", size=3, vjust=-1.5, fontface="bold") +
geom_text(data=filter(jobsDumbellData2016, occupation=="Physicians and surgeons"),
          aes(x=166388, y=occupation, label="Female"),
          color="#D95F02", size=3, vjust=-1.5, fontface="bold")

plot(jobsDumbell2016)

2015

jobsDumbellData2015 <- jobs_gender %>%
  filter(year== 2015) %>%
  mutate(wage_dif = total_earnings_male - total_earnings_female) %>%
  select(occupation, total_earnings_male, total_earnings_female, wage_dif) %>%
  filter(occupation %in% top10Jobs2016)%>%
  gather(key = "gender_earnings", value = "income", total_earnings_male:total_earnings_female) %>%
  mutate(
    gender = if_else(
      gender_earnings == 'total_earnings_male', 'Male', 'Female')) %>%
  mutate(
    occupation = fct_relevel(occupation, top10Jobs2016),
    gender = as.factor(gender))

jobsDumbell2015 <- ggplot(jobsDumbellData2015,
       aes(x = income, y = occupation)) +
  geom_line(aes(group = occupation),
            color = 'grey', size = 1) +
  geom_point(aes(color = gender), size = 2.5, show.legend = FALSE) +
  theme_minimal_vgrid(font_size = 11) +
  theme(
    axis.line.y = element_blank(),
    axis.ticks.y = element_blank()) +
  labs(x = 'Estimated Median Earnings (US$)',
       y = 'Occupation',
       color = 'Gender',
       title = 'Wage Gap for the 10 Highest Paid Jobs', subtitle = "From year 2015") +
  geom_rect(data=jobsDumbellData2015, aes(xmin=250000, xmax=300000, ymin=-Inf, ymax=Inf), fill="grey") +
geom_text(data=jobsDumbellData2015, aes(label=scales::dollar(wage_dif), y=occupation, x=275000), color = "black", fontface="bold", size=3) +
geom_text(data=filter(jobsDumbellData2015, occupation=="Physicians and surgeons"), 
            aes(x=275000, y=occupation, label="Difference"),
            color="black", size=3, vjust=-2, fontface="bold") +
scale_x_continuous(expand=expansion(mult=c(.015,0.05)), limits=c(50000, 300000), labels = scales::dollar, breaks = seq(50000,250000,50000)) +
  scale_color_manual(
      values = c('#D95F02', '#1B9E77')) +
scale_y_discrete(expand=c(0.12,0)) +
  geom_text(data=filter(jobsDumbellData2015, occupation=="Physicians and surgeons"),
          aes(x=221528, y=occupation, label="Male"),
          color="#1B9E77", size=3, vjust=-1.5, fontface="bold") +
geom_text(data=filter(jobsDumbellData2015, occupation=="Physicians and surgeons"),
          aes(x=150975, y=occupation, label="Female"),
          color="#D95F02", size=3, vjust=-1.5, fontface="bold")

plot(jobsDumbell2015)

2014

jobsDumbellData2014 <- jobs_gender %>%
  filter(year== 2014) %>%
  mutate(wage_dif = total_earnings_male - total_earnings_female) %>%
  select(occupation, total_earnings_male, total_earnings_female, wage_dif) %>%
  filter(occupation %in% top10Jobs2016)%>%
  gather(key = "gender_earnings", value = "income", total_earnings_male:total_earnings_female) %>%
  mutate(
    gender = if_else(
      gender_earnings == 'total_earnings_male', 'Male', 'Female')) %>%
  mutate(
    occupation = fct_relevel(occupation, top10Jobs2016),
    gender = as.factor(gender))

jobsDumbell2014 <- ggplot(jobsDumbellData2014,
       aes(x = income, y = occupation)) +
  geom_line(aes(group = occupation),
            color = 'grey', size = 1) +
  geom_point(aes(color = gender), size = 2.5, show.legend = FALSE) +
  theme_minimal_vgrid(font_size = 11) +
  theme(
    axis.line.y = element_blank(),
    axis.ticks.y = element_blank()) +
  labs(x = 'Estimated Median Earnings (US$)',
       y = 'Occupation',
       color = 'Gender',
       title = 'Wage Gap for the 10 Highest Paid Jobs', subtitle = "From year 2014") +
  geom_rect(data=jobsDumbellData2014, aes(xmin=250000, xmax=300000, ymin=-Inf, ymax=Inf), fill="grey") +
geom_text(data=jobsDumbellData2014, aes(label=scales::dollar(wage_dif), y=occupation, x=275000), color = "black", fontface="bold", size=3) +
geom_text(data=filter(jobsDumbellData2014, occupation=="Physicians and surgeons"), 
            aes(x=275000, y=occupation, label="Difference"),
            color="black", size=3, vjust=-2, fontface="bold") +
scale_x_continuous(expand=expansion(mult=c(.015,0.05)), limits=c(50000, 300000), labels = scales::dollar, breaks = seq(50000,250000,50000)) +
  scale_color_manual(
      values = c('#D95F02', '#1B9E77')) +
scale_y_discrete(expand=c(0.12,0)) +
  geom_text(data=filter(jobsDumbellData2014, occupation=="Physicians and surgeons"),
          aes(x=211526, y=occupation, label="Male"),
          color="#1B9E77", size=3, vjust=-1.5, fontface="bold") +
geom_text(data=filter(jobsDumbellData2014, occupation=="Physicians and surgeons"),
          aes(x=150053, y=occupation, label="Female"),
          color="#D95F02", size=3, vjust=-1.5, fontface="bold")

plot(jobsDumbell2014)

2013

jobsDumbellData2013 <- jobs_gender %>%
  filter(year== 2013) %>%
  mutate(wage_dif = total_earnings_male - total_earnings_female) %>%
  select(occupation, total_earnings_male, total_earnings_female, wage_dif) %>%
  filter(occupation %in% top10Jobs2016)%>%
  gather(key = "gender_earnings", value = "income", total_earnings_male:total_earnings_female) %>%
  mutate(
    gender = if_else(
      gender_earnings == 'total_earnings_male', 'Male', 'Female')) %>%
  mutate(
    occupation = fct_relevel(occupation, top10Jobs2016),
    gender = as.factor(gender))

jobsDumbell2013 <- ggplot(jobsDumbellData2013,
       aes(x = income, y = occupation)) +
  geom_line(aes(group = occupation),
            color = 'grey', size = 1) +
  geom_point(aes(color = gender), size = 2.5, show.legend = FALSE) +
  theme_minimal_vgrid(font_size = 11) +
  theme(
    axis.line.y = element_blank(),
    axis.ticks.y = element_blank()) +
  labs(x = 'Estimated Median Earnings (US$)',
       y = 'Occupation',
       color = 'Gender',
       title = 'Wage Gap for the 10 Highest Paid Jobs', subtitle = "From year 2013") +
  geom_rect(data=jobsDumbellData2013, aes(xmin=250000, xmax=300000, ymin=-Inf, ymax=Inf), fill="grey") +
geom_text(data=jobsDumbellData2013, aes(label=scales::dollar(wage_dif), y=occupation, x=275000), color = "black", fontface="bold", size=3) +
geom_text(data=filter(jobsDumbellData2013, occupation=="Physicians and surgeons"), 
            aes(x=275000, y=occupation, label="Difference"),
            color="black", size=3, vjust=-2, fontface="bold") +
scale_x_continuous(expand=expansion(mult=c(.015,0.05)), limits=c(50000, 300000), labels = scales::dollar, breaks = seq(50000,250000,50000)) +
  scale_color_manual(
      values = c('#D95F02', '#1B9E77')) +
scale_y_discrete(expand=c(0.12,0)) +
  geom_text(data=filter(jobsDumbellData2013, occupation=="Physicians and surgeons"),
          aes(x=202533, y=occupation, label="Male"),
          color="#1B9E77", size=3, vjust=-1.5, fontface="bold") +
geom_text(data=filter(jobsDumbellData2013, occupation=="Physicians and surgeons"),
          aes(x=140036, y=occupation, label="Female"),
          color="#D95F02", size=3, vjust=-1.5, fontface="bold")

plot(jobsDumbell2013)

Click the different tabs to see the plot for different years
  • After plotting the graph for 2016, we can see by the chart that all of the top 10 jobs, except for “Architectural and engineering managers” (which has a wage gap of 45 US Dollars) includes a considerable gap between wages. The top paid job, “Physicians and surgeons”, also has the highest wage gap of over US$ 65,000.

  • By clicking on the tabs to see the plots for all four years, we can identify that there has been variations in income by occupation for the last four years, however, on average male workers for all occupations have consistently a higher income than female workers. The only incident of females having a higher income occurs in 2015 for “Architectural and engineering managers”.

To have a clearer view of how the wage gap changed over time for these 10 occupations, we decided to plot the following line chart, “Wage Gap Change Over The Years For The Top 10 Highest Paid Jobs:”

occupationsGapData <- jobs_gender %>%
  mutate(wage_dif = total_earnings_male - total_earnings_female) %>%
  select(year, occupation, wage_dif) %>%
  filter(occupation %in% top10Jobs2016)
  
ggplot() +
  geom_line(data = occupationsGapData, aes(x=year, y = wage_dif, color = occupation), alpha = 0.75 , size = 0.75) +
  geom_text_repel(data = occupationsGapData %>%
                    filter(year == max(year)), aes(x= year, y = wage_dif, label = occupation, color = occupation), hjust=0, nudge_x = 1, direction = "y", size = 3, segment.color = NA) +
  scale_x_continuous(breaks = seq(2013,2016, 1),
                     expand = expansion(add = c(0,.85))) +
  scale_y_continuous(labels = scales::dollar_format(scale =1)) + 
  scale_fill_viridis() +
  theme_half_open(font_size = 11) + 
  theme(legend.position = 'none') +
  labs(x= "Year", y = "Wage Difference from Male to Female", title = "Wage Gap Change Over The Years For The Top 10 Highest Paid Jobs", caption = "A negative value corresponds to the female salary being greater than the male salary.")

  • This plot shows that there is no clear increase or decrease in the gap overall. However, we can see that the gap has substantially widened for Mathematicians, demonstrating this may be getting worse.

  • We can also see that most of the wage gaps lie from around 25,000 US Dollars to 65,000 US Dollars, which shows that there is still a long way to go in order to achieve wage equality.


In conclusion, the US wage gap has been a problem for many years. While there has been conversations that the salaries between men and women are no longer different, it is evident that at higher paid positions as well as older age groups, there is a clear gap. Unfortunately this gap does not appear to be consistently improving over time as society may have hoped. The gap is exacerbated by the oldest age groups, and it is even still prevalent within the highest paid occupations. It does not appear by the charts that the efforts to combat the problem have resulted in significant improvement. If this data reflects reality, what have the efforts to close this gap actually done?


Appendix

Below is all a Data Dictionary of employed_gender, earnings_female, and jobs_gender:

Data Dictionary

jobs_gender.csv

variable class description
year integer Year
occupation character Specific job/career
major_category character Broad category of occupation
minor_category character Fine category of occupation
total_workers double Total estimated full-time workers > 16 years old
workers_male double Estimated MALE full-time workers > 16 years old
workers_female double Estimated FEMALE full-time workers > 16 years old
percent_female double The percent of females for specific occupation
total_earnings double Total estimated median earnings for full-time workers > 16 years old
total_earnings_male double Estimated MALE median earnings for full-time workers > 16 years old
total_earnings_female double Estimated FEMALE median earnings for full-time workers > 16 years old
wage_percent_of_male double Female wages as percent of male wages - NA for occupations with small sample size




earnings_female.csv

variable class description
Year integer Year
group character Age group
percent double Female salary percent of male salary


employed_gender.csv

variable class description
year double Year
total_full_time double Percent of total employed people usually working full time
total_part_time double Percent of total employed people usually working part time
full_time_female double Percent of employed women usually working full time
part_time_female double Percent of employed women usually working part time
full_time_male double Percent of employed men usually working full time
part_time_male double Percent of employed men usually working part time
# Load libraries and settings here
library(tidyverse)
library(here)
knitr::opts_chunk$set(
    warning = FALSE,
    message = FALSE,
    comment = "#>",
    fig.path = "figs/", # Folder where rendered plots are saved
    fig.width = 7.252, # Default plot width
    fig.height = 4, # Default plot height
    fig.retina = 3 # For better plot resolution
)

# Load data below here
library(tidyverse)
library(here)
library(knitr)
library(readxl)
library("GGally")
library(cowplot)
library(ggplot2)
library(ggrepel)
library(cowplot)
library(viridis)
library(lubridate)
library(plotly)


# Put any other "global" settings here, e.g. a ggplot theme:
theme_set(theme_bw(base_size = 20))
#reading in the data
jobs_gender <- readr::read_csv("https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2019/2019-03-05/jobs_gender.csv")
earnings_female <- readr::read_csv("https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2019/2019-03-05/earnings_female.csv") 
employed_gender <- readr::read_csv("https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2019/2019-03-05/employed_gender.csv") 

#full time compared to part time
employedfulltime_gender <- employed_gender %>%
  select(year,full_time_female, full_time_male) %>%
  rename(Female = full_time_female, Male = full_time_male) %>%
  gather(key = "gender", value = "fullTime", Female:Male) 
  

employedfulltime_gender %>%  
  ggplot(aes(x=year, y = fullTime, color = gender)) +
  geom_line(size = .5) +
  geom_text_repel(data = employedfulltime_gender %>%
                    filter(year == max(year)), aes(label = gender), hjust=0, nudge_x = 1, direction = "y", size = 4.5, segment.color = NA) +
  geom_point(size =.5) +
  scale_x_continuous(breaks = seq(1968,2016, 4),
                     expand = expansion(add = c(0,11))) +
  scale_y_continuous(breaks = seq(0,100,2)) +
  scale_color_manual(
      values = c('#D95F02', '#1B9E77')) +
  theme_half_open(font_size = 11) + 
  theme(legend.position = 'none') +
  geom_curve(
    data = data.frame(
      x = 1969, xend = 1969, y = 85, yend = 91.5),
    mapping = aes(x = x, xend = xend, y = y, yend = yend),
    color = 'black', size = 0.5, curvature = 0,
    arrow = arrow(length = unit(0.01, "npc"),
                  type = "closed")) +
  geom_smooth(se = FALSE, linetype = 'dashed', method = "lm")+
  geom_curve(
    data = data.frame(
      x = 1969, xend = 1969, y = 83.5, yend = 76),
    mapping = aes(x = x, xend = xend, y = y, yend = yend),
    color = 'black', size = 0.5, curvature = 0,
    arrow = arrow(length = unit(0.01, "npc"),
                  type = "closed")) +
  annotate(geom = 'text', x = 1975, y = 84,
             label = 'Difference of 17.1% in 1968', size = 3, color = 'black') +
  annotate(geom = 'text', x = 2017, y = 82,
            label = 'Difference of 12.5% in 2016', size = 3, color = 'black') +
  geom_curve(
    data = data.frame(
      x = 2017, xend = 2017, y = 82.5, yend = 87),
    mapping = aes(x = x, xend = xend, y = y, yend = yend),
    color = 'black', size = 0.5, curvature = 0,
    arrow = arrow(length = unit(0.01, "npc"),
                  type = "closed")) +
  geom_curve(
    data = data.frame(
      x = 2017, xend = 2017, y = 81, yend = 76),
    mapping = aes(x = x, xend = xend, y = y, yend = yend),
    color = 'black', size = 0.5, curvature = 0,
    arrow = arrow(length = unit(0.01, "npc"),
                  type = "closed"))+
  scale_y_continuous(labels = scales::percent_format(scale =1, accuracy = 1)) +
  labs(x = "Year", y = "Percent Employed Full-time by Gender ", title = "Trends of Genders Working Full Time")

# calculate the difference in 1968
# male = 92.2 female = 75.1 difference = 17.1

# calculate the difference in 2016
# male = 87.6 female = 75.1, difference = 12.5



agegroupearnings <- earnings_female %>%
  filter(group != "Total, 16 years and older")
totalearnings <-  earnings_female %>%
  filter(group == "Total, 16 years and older")

ggplot() +
  geom_line(data = agegroupearnings, aes(x=Year, y = percent, color = group), alpha = 0.45, size =.5) +
  geom_line(data = totalearnings, aes(x=Year, y = percent, color = group), alpha = 1.2, size =.7) +
  geom_text_repel(data = earnings_female %>%
                    filter(Year == max(Year)), aes(x= Year, y = percent, label = group, color = group), hjust=0, nudge_x = 1, direction = "y", size = 4.3, segment.color = NA) +
  scale_x_continuous(breaks = seq(1980,2011, 5),
                     expand = expansion(add = c(0,15.5))) +
  scale_y_continuous(labels = scales::percent_format(scale =1)) + 
  scale_color_brewer(palette="Dark2") +
  theme_half_open(font_size = 11) + 
  theme(legend.position = 'none') +
  labs(x= "Year", y = "Female salary percent of male salary", title = "Age Trends of Female Salary as a Percentage of Male Salary")


jobsDumbellData2016 <- jobs_gender %>%
  filter(year== 2016) %>%
  mutate(wage_dif = total_earnings_male - total_earnings_female) %>%
  select(occupation, total_earnings_male, total_earnings_female, wage_dif) %>%
  arrange(desc(total_earnings_male)) %>%
  slice(1:10) %>%
  gather(key = "gender_earnings", value = "income", total_earnings_male:total_earnings_female) %>%
  mutate(
    gender = if_else(
      gender_earnings == 'total_earnings_male', 'Male', 'Female')) %>%
  mutate(
    occupation = fct_reorder2(occupation,
      gender, desc(income)),
    gender = as.factor(gender))

top10Jobs2016 <- rev(c("Physicians and surgeons",
                   "Nurse anesthetists",
                   "Dentists",
                   "Petroleum engineers",
                   "Chief executives",
                   "Podiatrists",
                   "Lawyers",
                   "Mathematicians",
                   "Architectural and engineering managers",
                   "Optometrists"))

jobsDumbell2016 <- ggplot(jobsDumbellData2016,
       aes(x = income, y = occupation)) +
  geom_line(aes(group = occupation),
            color = 'grey', size = 1) +
  geom_point(aes(color = gender), size = 2.5, show.legend = FALSE) +
  theme_minimal_vgrid(font_size = 11) +
  theme(
    axis.line.y = element_blank(),
    axis.ticks.y = element_blank()) +
  labs(x = 'Estimated Median Earnings (US$)',
       y = 'Occupation',
       color = 'Gender',
       title = 'Wage Gap for the 10 Highest Paid Jobs', subtitle = "From year 2016") +
  geom_rect(data=jobsDumbellData2016, aes(xmin=250000, xmax=300000, ymin=-Inf, ymax=Inf), fill="grey") +
geom_text(data=jobsDumbellData2016, aes(label=scales::dollar(wage_dif), y=occupation, x=275000), color = "black", fontface="bold", size=3) +
geom_text(data=filter(jobsDumbellData2016, occupation=="Physicians and surgeons"), 
            aes(x=275000, y=occupation, label="Difference"),
            color="black", size=3, vjust=-2, fontface="bold") +
scale_x_continuous(expand=expansion(mult=c(.015,0.05)), limits=c(50000, 300000), labels = scales::dollar, breaks = seq(50000,250000,50000)) +
  scale_color_manual(
      values = c('#D95F02', '#1B9E77')) +
scale_y_discrete(expand=c(0.12,0)) +
  geom_text(data=filter(jobsDumbellData2016, occupation=="Physicians and surgeons"),
          aes(x=231420, y=occupation, label="Male"),
          color="#1B9E77", size=3, vjust=-1.5, fontface="bold") +
geom_text(data=filter(jobsDumbellData2016, occupation=="Physicians and surgeons"),
          aes(x=166388, y=occupation, label="Female"),
          color="#D95F02", size=3, vjust=-1.5, fontface="bold")

plot(jobsDumbell2016)
jobsDumbellData2015 <- jobs_gender %>%
  filter(year== 2015) %>%
  mutate(wage_dif = total_earnings_male - total_earnings_female) %>%
  select(occupation, total_earnings_male, total_earnings_female, wage_dif) %>%
  filter(occupation %in% top10Jobs2016)%>%
  gather(key = "gender_earnings", value = "income", total_earnings_male:total_earnings_female) %>%
  mutate(
    gender = if_else(
      gender_earnings == 'total_earnings_male', 'Male', 'Female')) %>%
  mutate(
    occupation = fct_relevel(occupation, top10Jobs2016),
    gender = as.factor(gender))

jobsDumbell2015 <- ggplot(jobsDumbellData2015,
       aes(x = income, y = occupation)) +
  geom_line(aes(group = occupation),
            color = 'grey', size = 1) +
  geom_point(aes(color = gender), size = 2.5, show.legend = FALSE) +
  theme_minimal_vgrid(font_size = 11) +
  theme(
    axis.line.y = element_blank(),
    axis.ticks.y = element_blank()) +
  labs(x = 'Estimated Median Earnings (US$)',
       y = 'Occupation',
       color = 'Gender',
       title = 'Wage Gap for the 10 Highest Paid Jobs', subtitle = "From year 2015") +
  geom_rect(data=jobsDumbellData2015, aes(xmin=250000, xmax=300000, ymin=-Inf, ymax=Inf), fill="grey") +
geom_text(data=jobsDumbellData2015, aes(label=scales::dollar(wage_dif), y=occupation, x=275000), color = "black", fontface="bold", size=3) +
geom_text(data=filter(jobsDumbellData2015, occupation=="Physicians and surgeons"), 
            aes(x=275000, y=occupation, label="Difference"),
            color="black", size=3, vjust=-2, fontface="bold") +
scale_x_continuous(expand=expansion(mult=c(.015,0.05)), limits=c(50000, 300000), labels = scales::dollar, breaks = seq(50000,250000,50000)) +
  scale_color_manual(
      values = c('#D95F02', '#1B9E77')) +
scale_y_discrete(expand=c(0.12,0)) +
  geom_text(data=filter(jobsDumbellData2015, occupation=="Physicians and surgeons"),
          aes(x=221528, y=occupation, label="Male"),
          color="#1B9E77", size=3, vjust=-1.5, fontface="bold") +
geom_text(data=filter(jobsDumbellData2015, occupation=="Physicians and surgeons"),
          aes(x=150975, y=occupation, label="Female"),
          color="#D95F02", size=3, vjust=-1.5, fontface="bold")

plot(jobsDumbell2015)
jobsDumbellData2014 <- jobs_gender %>%
  filter(year== 2014) %>%
  mutate(wage_dif = total_earnings_male - total_earnings_female) %>%
  select(occupation, total_earnings_male, total_earnings_female, wage_dif) %>%
  filter(occupation %in% top10Jobs2016)%>%
  gather(key = "gender_earnings", value = "income", total_earnings_male:total_earnings_female) %>%
  mutate(
    gender = if_else(
      gender_earnings == 'total_earnings_male', 'Male', 'Female')) %>%
  mutate(
    occupation = fct_relevel(occupation, top10Jobs2016),
    gender = as.factor(gender))

jobsDumbell2014 <- ggplot(jobsDumbellData2014,
       aes(x = income, y = occupation)) +
  geom_line(aes(group = occupation),
            color = 'grey', size = 1) +
  geom_point(aes(color = gender), size = 2.5, show.legend = FALSE) +
  theme_minimal_vgrid(font_size = 11) +
  theme(
    axis.line.y = element_blank(),
    axis.ticks.y = element_blank()) +
  labs(x = 'Estimated Median Earnings (US$)',
       y = 'Occupation',
       color = 'Gender',
       title = 'Wage Gap for the 10 Highest Paid Jobs', subtitle = "From year 2014") +
  geom_rect(data=jobsDumbellData2014, aes(xmin=250000, xmax=300000, ymin=-Inf, ymax=Inf), fill="grey") +
geom_text(data=jobsDumbellData2014, aes(label=scales::dollar(wage_dif), y=occupation, x=275000), color = "black", fontface="bold", size=3) +
geom_text(data=filter(jobsDumbellData2014, occupation=="Physicians and surgeons"), 
            aes(x=275000, y=occupation, label="Difference"),
            color="black", size=3, vjust=-2, fontface="bold") +
scale_x_continuous(expand=expansion(mult=c(.015,0.05)), limits=c(50000, 300000), labels = scales::dollar, breaks = seq(50000,250000,50000)) +
  scale_color_manual(
      values = c('#D95F02', '#1B9E77')) +
scale_y_discrete(expand=c(0.12,0)) +
  geom_text(data=filter(jobsDumbellData2014, occupation=="Physicians and surgeons"),
          aes(x=211526, y=occupation, label="Male"),
          color="#1B9E77", size=3, vjust=-1.5, fontface="bold") +
geom_text(data=filter(jobsDumbellData2014, occupation=="Physicians and surgeons"),
          aes(x=150053, y=occupation, label="Female"),
          color="#D95F02", size=3, vjust=-1.5, fontface="bold")

plot(jobsDumbell2014)
jobsDumbellData2013 <- jobs_gender %>%
  filter(year== 2013) %>%
  mutate(wage_dif = total_earnings_male - total_earnings_female) %>%
  select(occupation, total_earnings_male, total_earnings_female, wage_dif) %>%
  filter(occupation %in% top10Jobs2016)%>%
  gather(key = "gender_earnings", value = "income", total_earnings_male:total_earnings_female) %>%
  mutate(
    gender = if_else(
      gender_earnings == 'total_earnings_male', 'Male', 'Female')) %>%
  mutate(
    occupation = fct_relevel(occupation, top10Jobs2016),
    gender = as.factor(gender))

jobsDumbell2013 <- ggplot(jobsDumbellData2013,
       aes(x = income, y = occupation)) +
  geom_line(aes(group = occupation),
            color = 'grey', size = 1) +
  geom_point(aes(color = gender), size = 2.5, show.legend = FALSE) +
  theme_minimal_vgrid(font_size = 11) +
  theme(
    axis.line.y = element_blank(),
    axis.ticks.y = element_blank()) +
  labs(x = 'Estimated Median Earnings (US$)',
       y = 'Occupation',
       color = 'Gender',
       title = 'Wage Gap for the 10 Highest Paid Jobs', subtitle = "From year 2013") +
  geom_rect(data=jobsDumbellData2013, aes(xmin=250000, xmax=300000, ymin=-Inf, ymax=Inf), fill="grey") +
geom_text(data=jobsDumbellData2013, aes(label=scales::dollar(wage_dif), y=occupation, x=275000), color = "black", fontface="bold", size=3) +
geom_text(data=filter(jobsDumbellData2013, occupation=="Physicians and surgeons"), 
            aes(x=275000, y=occupation, label="Difference"),
            color="black", size=3, vjust=-2, fontface="bold") +
scale_x_continuous(expand=expansion(mult=c(.015,0.05)), limits=c(50000, 300000), labels = scales::dollar, breaks = seq(50000,250000,50000)) +
  scale_color_manual(
      values = c('#D95F02', '#1B9E77')) +
scale_y_discrete(expand=c(0.12,0)) +
  geom_text(data=filter(jobsDumbellData2013, occupation=="Physicians and surgeons"),
          aes(x=202533, y=occupation, label="Male"),
          color="#1B9E77", size=3, vjust=-1.5, fontface="bold") +
geom_text(data=filter(jobsDumbellData2013, occupation=="Physicians and surgeons"),
          aes(x=140036, y=occupation, label="Female"),
          color="#D95F02", size=3, vjust=-1.5, fontface="bold")

plot(jobsDumbell2013)
occupationsGapData <- jobs_gender %>%
  mutate(wage_dif = total_earnings_male - total_earnings_female) %>%
  select(year, occupation, wage_dif) %>%
  filter(occupation %in% top10Jobs2016)
  
ggplot() +
  geom_line(data = occupationsGapData, aes(x=year, y = wage_dif, color = occupation), alpha = 0.75 , size = 0.75) +
  geom_text_repel(data = occupationsGapData %>%
                    filter(year == max(year)), aes(x= year, y = wage_dif, label = occupation, color = occupation), hjust=0, nudge_x = 1, direction = "y", size = 3, segment.color = NA) +
  scale_x_continuous(breaks = seq(2013,2016, 1),
                     expand = expansion(add = c(0,.85))) +
  scale_y_continuous(labels = scales::dollar_format(scale =1)) + 
  scale_fill_viridis() +
  theme_half_open(font_size = 11) + 
  theme(legend.position = 'none') +
  labs(x= "Year", y = "Wage Difference from Male to Female", title = "Wage Gap Change Over The Years For The Top 10 Highest Paid Jobs", caption = "A negative value corresponds to the female salary being greater than the male salary.")