#reading in the data
jobs_gender <- readr::read_csv("https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2019/2019-03-05/jobs_gender.csv")
earnings_female <- readr::read_csv("https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2019/2019-03-05/earnings_female.csv")
employed_gender <- readr::read_csv("https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2019/2019-03-05/employed_gender.csv")
The source we are using to explore the question of, “How has the US gender wage gap changed over time for different occupations and age groups” is provided by Tidy Tuesdays weekly project from May 3, 2019 of Women in The Workforce: https://github.com/rfordatascience/tidytuesday/tree/master/data/2019/2019-03-05
The original source of the specific jobs provided are derived from the U.S. Census Bureau: “Full-Time, Year-Round Workers and Median Earnings: 2000 and 2013-2019”. The source is: https://www.census.gov/data/tables/time-series/demo/industry-occupation/median-earnings.html.
Whereas, the historical data is extracted from the U.S. Bureau of Labor Statistics. The first historical data is Women’s earnings 1979-2011: https://www.bls.gov/opub/ted/2012/ted_20121123.htm. This data shows the earnings gap between women and men for most age groups. The second historical data set also extracted from the U.S. Bureau of Labor Statistics is about the percentage of employed women working full time since 1968: https://www.bls.gov/opub/ted/2017/percentage-of-employed-women-working-full-time-little-changed-over-past-5-decades.htm.
While the validity of the original source is reflected through the extraction of a government website, Tidy Tuesday did indeed re-process and clean up the data. It is important to note that some variables in this data set are only as recent as in 2011, whereas other variables have data as recent as 2019. There is always a potential for data to be biased depending on the incentive to collect it. While we do not presume this data would be biased based on the credibility of the US Census Bureau, it is always important to be cognizant of the possibility and explore all avenues of bias.
Data Dictionary for employed_gender
, earnings_female
, and jobs_gender
is in the Appendix
#full time compared to part time
employedfulltime_gender <- employed_gender %>%
select(year,full_time_female, full_time_male) %>%
rename(Female = full_time_female, Male = full_time_male) %>%
gather(key = "gender", value = "fullTime", Female:Male)
employedfulltime_gender %>%
ggplot(aes(x=year, y = fullTime, color = gender)) +
geom_line(size = .5) +
geom_text_repel(data = employedfulltime_gender %>%
filter(year == max(year)), aes(label = gender), hjust=0, nudge_x = 1, direction = "y", size = 4.5, segment.color = NA) +
geom_point(size =.5) +
scale_x_continuous(breaks = seq(1968,2016, 4),
expand = expansion(add = c(0,11))) +
scale_y_continuous(breaks = seq(0,100,2)) +
scale_color_manual(
values = c('#D95F02', '#1B9E77')) +
theme_half_open(font_size = 11) +
theme(legend.position = 'none') +
geom_curve(
data = data.frame(
x = 1969, xend = 1969, y = 85, yend = 91.5),
mapping = aes(x = x, xend = xend, y = y, yend = yend),
color = 'black', size = 0.5, curvature = 0,
arrow = arrow(length = unit(0.01, "npc"),
type = "closed")) +
geom_smooth(se = FALSE, linetype = 'dashed', method = "lm")+
geom_curve(
data = data.frame(
x = 1969, xend = 1969, y = 83.5, yend = 76),
mapping = aes(x = x, xend = xend, y = y, yend = yend),
color = 'black', size = 0.5, curvature = 0,
arrow = arrow(length = unit(0.01, "npc"),
type = "closed")) +
annotate(geom = 'text', x = 1975, y = 84,
label = 'Difference of 17.1% in 1968', size = 3, color = 'black') +
annotate(geom = 'text', x = 2017, y = 82,
label = 'Difference of 12.5% in 2016', size = 3, color = 'black') +
geom_curve(
data = data.frame(
x = 2017, xend = 2017, y = 82.5, yend = 87),
mapping = aes(x = x, xend = xend, y = y, yend = yend),
color = 'black', size = 0.5, curvature = 0,
arrow = arrow(length = unit(0.01, "npc"),
type = "closed")) +
geom_curve(
data = data.frame(
x = 2017, xend = 2017, y = 81, yend = 76),
mapping = aes(x = x, xend = xend, y = y, yend = yend),
color = 'black', size = 0.5, curvature = 0,
arrow = arrow(length = unit(0.01, "npc"),
type = "closed"))+
scale_y_continuous(labels = scales::percent_format(scale =1, accuracy = 1)) +
labs(x = "Year", y = "Percent Employed Full-time by Gender ", title = "Trends of Genders Working Full Time")
# calculate the difference in 1968
# male = 92.2 female = 75.1 difference = 17.1
# calculate the difference in 2016
# male = 87.6 female = 75.1, difference = 12.5
The above chart, “Trends of Genders Working Full time”, shows a visual representation of the wage gap over time. While the wage gap may be slightly less at the start of 2000 than it was in the 1960’s-1980’s, there is still very much so an apparent gap in salary between male and female in 2016.
While the gap between the two genders has decreased, the decrease is not formed from women entering the work force, but rather men leaving it. The percentage of women working full time in 1968 is the same as in 2016.
It is also clear that there was a large decline in the percentage of men and women employed in 2008 because of the 2008 financial crisis.
After analyzing this chart, we decided to look into whether occupation or age group have a larger effect on the gap.
Not only is there an evident difference between the distribution of the percent of genders working full time, there is also a reflection of a gender difference in salary. The following chart, “Age Trends of Female Salary as a Percentage of Male Salary” shows female salaries as a percentage of male salaries. This chart shows that since 1979 the total trends of female salary as a percent of male salary for all ages has increased over time. In 1979, the female salary as a percentage of male was 62.3% and 80.9% in 2011. While this is an 18.6% increase, females are still not paid the same percentage as male. As of 2011, there is still a 19.1% gap .
agegroupearnings <- earnings_female %>%
filter(group != "Total, 16 years and older")
totalearnings <- earnings_female %>%
filter(group == "Total, 16 years and older")
ggplot() +
geom_line(data = agegroupearnings, aes(x=Year, y = percent, color = group), alpha = 0.45, size =.5) +
geom_line(data = totalearnings, aes(x=Year, y = percent, color = group), alpha = 1.2, size =.7) +
geom_text_repel(data = earnings_female %>%
filter(Year == max(Year)), aes(x= Year, y = percent, label = group, color = group), hjust=0, nudge_x = 1, direction = "y", size = 4.3, segment.color = NA) +
scale_x_continuous(breaks = seq(1980,2011, 5),
expand = expansion(add = c(0,15.5))) +
scale_y_continuous(labels = scales::percent_format(scale =1)) +
scale_color_brewer(palette="Dark2") +
theme_half_open(font_size = 11) +
theme(legend.position = 'none') +
labs(x= "Year", y = "Female salary percent of male salary", title = "Age Trends of Female Salary as a Percentage of Male Salary")
The lingering question is why? How does it make sense that females are still not paid the same as male?
A deeper look in the figure above shows the age distributions in the transparent colors. It appears that the younger the age, the salary gap is much smaller. A likely assumption is that when humans enter the workforce, they tend to be working at minimum wage or a lower wage than those who have been working for many years. Therefore, there is not much room for females to be lesser than male since they enter at the minimum legal wage. As time goes on and employees are promoted, the female salary percent of male salary decreases. Meaning as age increases, so does the difference between gender salaries.
Perhaps, men are being promoted to higher positions and women are not or that when men are promoted to superior positions, they are simply paid more than females at the same position. Another viable option is that more men hold more senior positions in high paying jobs and more women hold lower paying jobs, which could potentially be representative of the stark difference in salaries.
To get a closer look at salary distribution, an analysis of the top 10 highest paying jobs may help clear up a potential assumption that the age gap could be due to the the fact that men are paid more because they are simply employed with better earning job titles. The follow graphs shed light that even in the same occupation, there is still a gender wage gap.
jobsDumbellData2016 <- jobs_gender %>%
filter(year== 2016) %>%
mutate(wage_dif = total_earnings_male - total_earnings_female) %>%
select(occupation, total_earnings_male, total_earnings_female, wage_dif) %>%
arrange(desc(total_earnings_male)) %>%
slice(1:10) %>%
gather(key = "gender_earnings", value = "income", total_earnings_male:total_earnings_female) %>%
mutate(
gender = if_else(
gender_earnings == 'total_earnings_male', 'Male', 'Female')) %>%
mutate(
occupation = fct_reorder2(occupation,
gender, desc(income)),
gender = as.factor(gender))
top10Jobs2016 <- rev(c("Physicians and surgeons",
"Nurse anesthetists",
"Dentists",
"Petroleum engineers",
"Chief executives",
"Podiatrists",
"Lawyers",
"Mathematicians",
"Architectural and engineering managers",
"Optometrists"))
jobsDumbell2016 <- ggplot(jobsDumbellData2016,
aes(x = income, y = occupation)) +
geom_line(aes(group = occupation),
color = 'grey', size = 1) +
geom_point(aes(color = gender), size = 2.5, show.legend = FALSE) +
theme_minimal_vgrid(font_size = 11) +
theme(
axis.line.y = element_blank(),
axis.ticks.y = element_blank()) +
labs(x = 'Estimated Median Earnings (US$)',
y = 'Occupation',
color = 'Gender',
title = 'Wage Gap for the 10 Highest Paid Jobs', subtitle = "From year 2016") +
geom_rect(data=jobsDumbellData2016, aes(xmin=250000, xmax=300000, ymin=-Inf, ymax=Inf), fill="grey") +
geom_text(data=jobsDumbellData2016, aes(label=scales::dollar(wage_dif), y=occupation, x=275000), color = "black", fontface="bold", size=3) +
geom_text(data=filter(jobsDumbellData2016, occupation=="Physicians and surgeons"),
aes(x=275000, y=occupation, label="Difference"),
color="black", size=3, vjust=-2, fontface="bold") +
scale_x_continuous(expand=expansion(mult=c(.015,0.05)), limits=c(50000, 300000), labels = scales::dollar, breaks = seq(50000,250000,50000)) +
scale_color_manual(
values = c('#D95F02', '#1B9E77')) +
scale_y_discrete(expand=c(0.12,0)) +
geom_text(data=filter(jobsDumbellData2016, occupation=="Physicians and surgeons"),
aes(x=231420, y=occupation, label="Male"),
color="#1B9E77", size=3, vjust=-1.5, fontface="bold") +
geom_text(data=filter(jobsDumbellData2016, occupation=="Physicians and surgeons"),
aes(x=166388, y=occupation, label="Female"),
color="#D95F02", size=3, vjust=-1.5, fontface="bold")
plot(jobsDumbell2016)
jobsDumbellData2015 <- jobs_gender %>%
filter(year== 2015) %>%
mutate(wage_dif = total_earnings_male - total_earnings_female) %>%
select(occupation, total_earnings_male, total_earnings_female, wage_dif) %>%
filter(occupation %in% top10Jobs2016)%>%
gather(key = "gender_earnings", value = "income", total_earnings_male:total_earnings_female) %>%
mutate(
gender = if_else(
gender_earnings == 'total_earnings_male', 'Male', 'Female')) %>%
mutate(
occupation = fct_relevel(occupation, top10Jobs2016),
gender = as.factor(gender))
jobsDumbell2015 <- ggplot(jobsDumbellData2015,
aes(x = income, y = occupation)) +
geom_line(aes(group = occupation),
color = 'grey', size = 1) +
geom_point(aes(color = gender), size = 2.5, show.legend = FALSE) +
theme_minimal_vgrid(font_size = 11) +
theme(
axis.line.y = element_blank(),
axis.ticks.y = element_blank()) +
labs(x = 'Estimated Median Earnings (US$)',
y = 'Occupation',
color = 'Gender',
title = 'Wage Gap for the 10 Highest Paid Jobs', subtitle = "From year 2015") +
geom_rect(data=jobsDumbellData2015, aes(xmin=250000, xmax=300000, ymin=-Inf, ymax=Inf), fill="grey") +
geom_text(data=jobsDumbellData2015, aes(label=scales::dollar(wage_dif), y=occupation, x=275000), color = "black", fontface="bold", size=3) +
geom_text(data=filter(jobsDumbellData2015, occupation=="Physicians and surgeons"),
aes(x=275000, y=occupation, label="Difference"),
color="black", size=3, vjust=-2, fontface="bold") +
scale_x_continuous(expand=expansion(mult=c(.015,0.05)), limits=c(50000, 300000), labels = scales::dollar, breaks = seq(50000,250000,50000)) +
scale_color_manual(
values = c('#D95F02', '#1B9E77')) +
scale_y_discrete(expand=c(0.12,0)) +
geom_text(data=filter(jobsDumbellData2015, occupation=="Physicians and surgeons"),
aes(x=221528, y=occupation, label="Male"),
color="#1B9E77", size=3, vjust=-1.5, fontface="bold") +
geom_text(data=filter(jobsDumbellData2015, occupation=="Physicians and surgeons"),
aes(x=150975, y=occupation, label="Female"),
color="#D95F02", size=3, vjust=-1.5, fontface="bold")
plot(jobsDumbell2015)
jobsDumbellData2014 <- jobs_gender %>%
filter(year== 2014) %>%
mutate(wage_dif = total_earnings_male - total_earnings_female) %>%
select(occupation, total_earnings_male, total_earnings_female, wage_dif) %>%
filter(occupation %in% top10Jobs2016)%>%
gather(key = "gender_earnings", value = "income", total_earnings_male:total_earnings_female) %>%
mutate(
gender = if_else(
gender_earnings == 'total_earnings_male', 'Male', 'Female')) %>%
mutate(
occupation = fct_relevel(occupation, top10Jobs2016),
gender = as.factor(gender))
jobsDumbell2014 <- ggplot(jobsDumbellData2014,
aes(x = income, y = occupation)) +
geom_line(aes(group = occupation),
color = 'grey', size = 1) +
geom_point(aes(color = gender), size = 2.5, show.legend = FALSE) +
theme_minimal_vgrid(font_size = 11) +
theme(
axis.line.y = element_blank(),
axis.ticks.y = element_blank()) +
labs(x = 'Estimated Median Earnings (US$)',
y = 'Occupation',
color = 'Gender',
title = 'Wage Gap for the 10 Highest Paid Jobs', subtitle = "From year 2014") +
geom_rect(data=jobsDumbellData2014, aes(xmin=250000, xmax=300000, ymin=-Inf, ymax=Inf), fill="grey") +
geom_text(data=jobsDumbellData2014, aes(label=scales::dollar(wage_dif), y=occupation, x=275000), color = "black", fontface="bold", size=3) +
geom_text(data=filter(jobsDumbellData2014, occupation=="Physicians and surgeons"),
aes(x=275000, y=occupation, label="Difference"),
color="black", size=3, vjust=-2, fontface="bold") +
scale_x_continuous(expand=expansion(mult=c(.015,0.05)), limits=c(50000, 300000), labels = scales::dollar, breaks = seq(50000,250000,50000)) +
scale_color_manual(
values = c('#D95F02', '#1B9E77')) +
scale_y_discrete(expand=c(0.12,0)) +
geom_text(data=filter(jobsDumbellData2014, occupation=="Physicians and surgeons"),
aes(x=211526, y=occupation, label="Male"),
color="#1B9E77", size=3, vjust=-1.5, fontface="bold") +
geom_text(data=filter(jobsDumbellData2014, occupation=="Physicians and surgeons"),
aes(x=150053, y=occupation, label="Female"),
color="#D95F02", size=3, vjust=-1.5, fontface="bold")
plot(jobsDumbell2014)
jobsDumbellData2013 <- jobs_gender %>%
filter(year== 2013) %>%
mutate(wage_dif = total_earnings_male - total_earnings_female) %>%
select(occupation, total_earnings_male, total_earnings_female, wage_dif) %>%
filter(occupation %in% top10Jobs2016)%>%
gather(key = "gender_earnings", value = "income", total_earnings_male:total_earnings_female) %>%
mutate(
gender = if_else(
gender_earnings == 'total_earnings_male', 'Male', 'Female')) %>%
mutate(
occupation = fct_relevel(occupation, top10Jobs2016),
gender = as.factor(gender))
jobsDumbell2013 <- ggplot(jobsDumbellData2013,
aes(x = income, y = occupation)) +
geom_line(aes(group = occupation),
color = 'grey', size = 1) +
geom_point(aes(color = gender), size = 2.5, show.legend = FALSE) +
theme_minimal_vgrid(font_size = 11) +
theme(
axis.line.y = element_blank(),
axis.ticks.y = element_blank()) +
labs(x = 'Estimated Median Earnings (US$)',
y = 'Occupation',
color = 'Gender',
title = 'Wage Gap for the 10 Highest Paid Jobs', subtitle = "From year 2013") +
geom_rect(data=jobsDumbellData2013, aes(xmin=250000, xmax=300000, ymin=-Inf, ymax=Inf), fill="grey") +
geom_text(data=jobsDumbellData2013, aes(label=scales::dollar(wage_dif), y=occupation, x=275000), color = "black", fontface="bold", size=3) +
geom_text(data=filter(jobsDumbellData2013, occupation=="Physicians and surgeons"),
aes(x=275000, y=occupation, label="Difference"),
color="black", size=3, vjust=-2, fontface="bold") +
scale_x_continuous(expand=expansion(mult=c(.015,0.05)), limits=c(50000, 300000), labels = scales::dollar, breaks = seq(50000,250000,50000)) +
scale_color_manual(
values = c('#D95F02', '#1B9E77')) +
scale_y_discrete(expand=c(0.12,0)) +
geom_text(data=filter(jobsDumbellData2013, occupation=="Physicians and surgeons"),
aes(x=202533, y=occupation, label="Male"),
color="#1B9E77", size=3, vjust=-1.5, fontface="bold") +
geom_text(data=filter(jobsDumbellData2013, occupation=="Physicians and surgeons"),
aes(x=140036, y=occupation, label="Female"),
color="#D95F02", size=3, vjust=-1.5, fontface="bold")
plot(jobsDumbell2013)
After plotting the graph for 2016, we can see by the chart that all of the top 10 jobs, except for “Architectural and engineering managers” (which has a wage gap of 45 US Dollars) includes a considerable gap between wages. The top paid job, “Physicians and surgeons”, also has the highest wage gap of over US$ 65,000.
By clicking on the tabs to see the plots for all four years, we can identify that there has been variations in income by occupation for the last four years, however, on average male workers for all occupations have consistently a higher income than female workers. The only incident of females having a higher income occurs in 2015 for “Architectural and engineering managers”.
To have a clearer view of how the wage gap changed over time for these 10 occupations, we decided to plot the following line chart, “Wage Gap Change Over The Years For The Top 10 Highest Paid Jobs:”
occupationsGapData <- jobs_gender %>%
mutate(wage_dif = total_earnings_male - total_earnings_female) %>%
select(year, occupation, wage_dif) %>%
filter(occupation %in% top10Jobs2016)
ggplot() +
geom_line(data = occupationsGapData, aes(x=year, y = wage_dif, color = occupation), alpha = 0.75 , size = 0.75) +
geom_text_repel(data = occupationsGapData %>%
filter(year == max(year)), aes(x= year, y = wage_dif, label = occupation, color = occupation), hjust=0, nudge_x = 1, direction = "y", size = 3, segment.color = NA) +
scale_x_continuous(breaks = seq(2013,2016, 1),
expand = expansion(add = c(0,.85))) +
scale_y_continuous(labels = scales::dollar_format(scale =1)) +
scale_fill_viridis() +
theme_half_open(font_size = 11) +
theme(legend.position = 'none') +
labs(x= "Year", y = "Wage Difference from Male to Female", title = "Wage Gap Change Over The Years For The Top 10 Highest Paid Jobs", caption = "A negative value corresponds to the female salary being greater than the male salary.")
This plot shows that there is no clear increase or decrease in the gap overall. However, we can see that the gap has substantially widened for Mathematicians, demonstrating this may be getting worse.
We can also see that most of the wage gaps lie from around 25,000 US Dollars to 65,000 US Dollars, which shows that there is still a long way to go in order to achieve wage equality.
In conclusion, the US wage gap has been a problem for many years. While there has been conversations that the salaries between men and women are no longer different, it is evident that at higher paid positions as well as older age groups, there is a clear gap. Unfortunately this gap does not appear to be consistently improving over time as society may have hoped. The gap is exacerbated by the oldest age groups, and it is even still prevalent within the highest paid occupations. It does not appear by the charts that the efforts to combat the problem have resulted in significant improvement. If this data reflects reality, what have the efforts to close this gap actually done?
Below is all a Data Dictionary of employed_gender
, earnings_female
, and jobs_gender
:
variable | class | description |
---|---|---|
year | integer | Year |
occupation | character | Specific job/career |
major_category | character | Broad category of occupation |
minor_category | character | Fine category of occupation |
total_workers | double | Total estimated full-time workers > 16 years old |
workers_male | double | Estimated MALE full-time workers > 16 years old |
workers_female | double | Estimated FEMALE full-time workers > 16 years old |
percent_female | double | The percent of females for specific occupation |
total_earnings | double | Total estimated median earnings for full-time workers > 16 years old |
total_earnings_male | double | Estimated MALE median earnings for full-time workers > 16 years old |
total_earnings_female | double | Estimated FEMALE median earnings for full-time workers > 16 years old |
wage_percent_of_male | double | Female wages as percent of male wages - NA for occupations with small sample size |
variable | class | description |
---|---|---|
Year | integer | Year |
group | character | Age group |
percent | double | Female salary percent of male salary |
variable | class | description |
---|---|---|
year | double | Year |
total_full_time | double | Percent of total employed people usually working full time |
total_part_time | double | Percent of total employed people usually working part time |
full_time_female | double | Percent of employed women usually working full time |
part_time_female | double | Percent of employed women usually working part time |
full_time_male | double | Percent of employed men usually working full time |
part_time_male | double | Percent of employed men usually working part time |
# Load libraries and settings here
library(tidyverse)
library(here)
knitr::opts_chunk$set(
warning = FALSE,
message = FALSE,
comment = "#>",
fig.path = "figs/", # Folder where rendered plots are saved
fig.width = 7.252, # Default plot width
fig.height = 4, # Default plot height
fig.retina = 3 # For better plot resolution
)
# Load data below here
library(tidyverse)
library(here)
library(knitr)
library(readxl)
library("GGally")
library(cowplot)
library(ggplot2)
library(ggrepel)
library(cowplot)
library(viridis)
library(lubridate)
library(plotly)
# Put any other "global" settings here, e.g. a ggplot theme:
theme_set(theme_bw(base_size = 20))
#reading in the data
jobs_gender <- readr::read_csv("https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2019/2019-03-05/jobs_gender.csv")
earnings_female <- readr::read_csv("https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2019/2019-03-05/earnings_female.csv")
employed_gender <- readr::read_csv("https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2019/2019-03-05/employed_gender.csv")
#full time compared to part time
employedfulltime_gender <- employed_gender %>%
select(year,full_time_female, full_time_male) %>%
rename(Female = full_time_female, Male = full_time_male) %>%
gather(key = "gender", value = "fullTime", Female:Male)
employedfulltime_gender %>%
ggplot(aes(x=year, y = fullTime, color = gender)) +
geom_line(size = .5) +
geom_text_repel(data = employedfulltime_gender %>%
filter(year == max(year)), aes(label = gender), hjust=0, nudge_x = 1, direction = "y", size = 4.5, segment.color = NA) +
geom_point(size =.5) +
scale_x_continuous(breaks = seq(1968,2016, 4),
expand = expansion(add = c(0,11))) +
scale_y_continuous(breaks = seq(0,100,2)) +
scale_color_manual(
values = c('#D95F02', '#1B9E77')) +
theme_half_open(font_size = 11) +
theme(legend.position = 'none') +
geom_curve(
data = data.frame(
x = 1969, xend = 1969, y = 85, yend = 91.5),
mapping = aes(x = x, xend = xend, y = y, yend = yend),
color = 'black', size = 0.5, curvature = 0,
arrow = arrow(length = unit(0.01, "npc"),
type = "closed")) +
geom_smooth(se = FALSE, linetype = 'dashed', method = "lm")+
geom_curve(
data = data.frame(
x = 1969, xend = 1969, y = 83.5, yend = 76),
mapping = aes(x = x, xend = xend, y = y, yend = yend),
color = 'black', size = 0.5, curvature = 0,
arrow = arrow(length = unit(0.01, "npc"),
type = "closed")) +
annotate(geom = 'text', x = 1975, y = 84,
label = 'Difference of 17.1% in 1968', size = 3, color = 'black') +
annotate(geom = 'text', x = 2017, y = 82,
label = 'Difference of 12.5% in 2016', size = 3, color = 'black') +
geom_curve(
data = data.frame(
x = 2017, xend = 2017, y = 82.5, yend = 87),
mapping = aes(x = x, xend = xend, y = y, yend = yend),
color = 'black', size = 0.5, curvature = 0,
arrow = arrow(length = unit(0.01, "npc"),
type = "closed")) +
geom_curve(
data = data.frame(
x = 2017, xend = 2017, y = 81, yend = 76),
mapping = aes(x = x, xend = xend, y = y, yend = yend),
color = 'black', size = 0.5, curvature = 0,
arrow = arrow(length = unit(0.01, "npc"),
type = "closed"))+
scale_y_continuous(labels = scales::percent_format(scale =1, accuracy = 1)) +
labs(x = "Year", y = "Percent Employed Full-time by Gender ", title = "Trends of Genders Working Full Time")
# calculate the difference in 1968
# male = 92.2 female = 75.1 difference = 17.1
# calculate the difference in 2016
# male = 87.6 female = 75.1, difference = 12.5
agegroupearnings <- earnings_female %>%
filter(group != "Total, 16 years and older")
totalearnings <- earnings_female %>%
filter(group == "Total, 16 years and older")
ggplot() +
geom_line(data = agegroupearnings, aes(x=Year, y = percent, color = group), alpha = 0.45, size =.5) +
geom_line(data = totalearnings, aes(x=Year, y = percent, color = group), alpha = 1.2, size =.7) +
geom_text_repel(data = earnings_female %>%
filter(Year == max(Year)), aes(x= Year, y = percent, label = group, color = group), hjust=0, nudge_x = 1, direction = "y", size = 4.3, segment.color = NA) +
scale_x_continuous(breaks = seq(1980,2011, 5),
expand = expansion(add = c(0,15.5))) +
scale_y_continuous(labels = scales::percent_format(scale =1)) +
scale_color_brewer(palette="Dark2") +
theme_half_open(font_size = 11) +
theme(legend.position = 'none') +
labs(x= "Year", y = "Female salary percent of male salary", title = "Age Trends of Female Salary as a Percentage of Male Salary")
jobsDumbellData2016 <- jobs_gender %>%
filter(year== 2016) %>%
mutate(wage_dif = total_earnings_male - total_earnings_female) %>%
select(occupation, total_earnings_male, total_earnings_female, wage_dif) %>%
arrange(desc(total_earnings_male)) %>%
slice(1:10) %>%
gather(key = "gender_earnings", value = "income", total_earnings_male:total_earnings_female) %>%
mutate(
gender = if_else(
gender_earnings == 'total_earnings_male', 'Male', 'Female')) %>%
mutate(
occupation = fct_reorder2(occupation,
gender, desc(income)),
gender = as.factor(gender))
top10Jobs2016 <- rev(c("Physicians and surgeons",
"Nurse anesthetists",
"Dentists",
"Petroleum engineers",
"Chief executives",
"Podiatrists",
"Lawyers",
"Mathematicians",
"Architectural and engineering managers",
"Optometrists"))
jobsDumbell2016 <- ggplot(jobsDumbellData2016,
aes(x = income, y = occupation)) +
geom_line(aes(group = occupation),
color = 'grey', size = 1) +
geom_point(aes(color = gender), size = 2.5, show.legend = FALSE) +
theme_minimal_vgrid(font_size = 11) +
theme(
axis.line.y = element_blank(),
axis.ticks.y = element_blank()) +
labs(x = 'Estimated Median Earnings (US$)',
y = 'Occupation',
color = 'Gender',
title = 'Wage Gap for the 10 Highest Paid Jobs', subtitle = "From year 2016") +
geom_rect(data=jobsDumbellData2016, aes(xmin=250000, xmax=300000, ymin=-Inf, ymax=Inf), fill="grey") +
geom_text(data=jobsDumbellData2016, aes(label=scales::dollar(wage_dif), y=occupation, x=275000), color = "black", fontface="bold", size=3) +
geom_text(data=filter(jobsDumbellData2016, occupation=="Physicians and surgeons"),
aes(x=275000, y=occupation, label="Difference"),
color="black", size=3, vjust=-2, fontface="bold") +
scale_x_continuous(expand=expansion(mult=c(.015,0.05)), limits=c(50000, 300000), labels = scales::dollar, breaks = seq(50000,250000,50000)) +
scale_color_manual(
values = c('#D95F02', '#1B9E77')) +
scale_y_discrete(expand=c(0.12,0)) +
geom_text(data=filter(jobsDumbellData2016, occupation=="Physicians and surgeons"),
aes(x=231420, y=occupation, label="Male"),
color="#1B9E77", size=3, vjust=-1.5, fontface="bold") +
geom_text(data=filter(jobsDumbellData2016, occupation=="Physicians and surgeons"),
aes(x=166388, y=occupation, label="Female"),
color="#D95F02", size=3, vjust=-1.5, fontface="bold")
plot(jobsDumbell2016)
jobsDumbellData2015 <- jobs_gender %>%
filter(year== 2015) %>%
mutate(wage_dif = total_earnings_male - total_earnings_female) %>%
select(occupation, total_earnings_male, total_earnings_female, wage_dif) %>%
filter(occupation %in% top10Jobs2016)%>%
gather(key = "gender_earnings", value = "income", total_earnings_male:total_earnings_female) %>%
mutate(
gender = if_else(
gender_earnings == 'total_earnings_male', 'Male', 'Female')) %>%
mutate(
occupation = fct_relevel(occupation, top10Jobs2016),
gender = as.factor(gender))
jobsDumbell2015 <- ggplot(jobsDumbellData2015,
aes(x = income, y = occupation)) +
geom_line(aes(group = occupation),
color = 'grey', size = 1) +
geom_point(aes(color = gender), size = 2.5, show.legend = FALSE) +
theme_minimal_vgrid(font_size = 11) +
theme(
axis.line.y = element_blank(),
axis.ticks.y = element_blank()) +
labs(x = 'Estimated Median Earnings (US$)',
y = 'Occupation',
color = 'Gender',
title = 'Wage Gap for the 10 Highest Paid Jobs', subtitle = "From year 2015") +
geom_rect(data=jobsDumbellData2015, aes(xmin=250000, xmax=300000, ymin=-Inf, ymax=Inf), fill="grey") +
geom_text(data=jobsDumbellData2015, aes(label=scales::dollar(wage_dif), y=occupation, x=275000), color = "black", fontface="bold", size=3) +
geom_text(data=filter(jobsDumbellData2015, occupation=="Physicians and surgeons"),
aes(x=275000, y=occupation, label="Difference"),
color="black", size=3, vjust=-2, fontface="bold") +
scale_x_continuous(expand=expansion(mult=c(.015,0.05)), limits=c(50000, 300000), labels = scales::dollar, breaks = seq(50000,250000,50000)) +
scale_color_manual(
values = c('#D95F02', '#1B9E77')) +
scale_y_discrete(expand=c(0.12,0)) +
geom_text(data=filter(jobsDumbellData2015, occupation=="Physicians and surgeons"),
aes(x=221528, y=occupation, label="Male"),
color="#1B9E77", size=3, vjust=-1.5, fontface="bold") +
geom_text(data=filter(jobsDumbellData2015, occupation=="Physicians and surgeons"),
aes(x=150975, y=occupation, label="Female"),
color="#D95F02", size=3, vjust=-1.5, fontface="bold")
plot(jobsDumbell2015)
jobsDumbellData2014 <- jobs_gender %>%
filter(year== 2014) %>%
mutate(wage_dif = total_earnings_male - total_earnings_female) %>%
select(occupation, total_earnings_male, total_earnings_female, wage_dif) %>%
filter(occupation %in% top10Jobs2016)%>%
gather(key = "gender_earnings", value = "income", total_earnings_male:total_earnings_female) %>%
mutate(
gender = if_else(
gender_earnings == 'total_earnings_male', 'Male', 'Female')) %>%
mutate(
occupation = fct_relevel(occupation, top10Jobs2016),
gender = as.factor(gender))
jobsDumbell2014 <- ggplot(jobsDumbellData2014,
aes(x = income, y = occupation)) +
geom_line(aes(group = occupation),
color = 'grey', size = 1) +
geom_point(aes(color = gender), size = 2.5, show.legend = FALSE) +
theme_minimal_vgrid(font_size = 11) +
theme(
axis.line.y = element_blank(),
axis.ticks.y = element_blank()) +
labs(x = 'Estimated Median Earnings (US$)',
y = 'Occupation',
color = 'Gender',
title = 'Wage Gap for the 10 Highest Paid Jobs', subtitle = "From year 2014") +
geom_rect(data=jobsDumbellData2014, aes(xmin=250000, xmax=300000, ymin=-Inf, ymax=Inf), fill="grey") +
geom_text(data=jobsDumbellData2014, aes(label=scales::dollar(wage_dif), y=occupation, x=275000), color = "black", fontface="bold", size=3) +
geom_text(data=filter(jobsDumbellData2014, occupation=="Physicians and surgeons"),
aes(x=275000, y=occupation, label="Difference"),
color="black", size=3, vjust=-2, fontface="bold") +
scale_x_continuous(expand=expansion(mult=c(.015,0.05)), limits=c(50000, 300000), labels = scales::dollar, breaks = seq(50000,250000,50000)) +
scale_color_manual(
values = c('#D95F02', '#1B9E77')) +
scale_y_discrete(expand=c(0.12,0)) +
geom_text(data=filter(jobsDumbellData2014, occupation=="Physicians and surgeons"),
aes(x=211526, y=occupation, label="Male"),
color="#1B9E77", size=3, vjust=-1.5, fontface="bold") +
geom_text(data=filter(jobsDumbellData2014, occupation=="Physicians and surgeons"),
aes(x=150053, y=occupation, label="Female"),
color="#D95F02", size=3, vjust=-1.5, fontface="bold")
plot(jobsDumbell2014)
jobsDumbellData2013 <- jobs_gender %>%
filter(year== 2013) %>%
mutate(wage_dif = total_earnings_male - total_earnings_female) %>%
select(occupation, total_earnings_male, total_earnings_female, wage_dif) %>%
filter(occupation %in% top10Jobs2016)%>%
gather(key = "gender_earnings", value = "income", total_earnings_male:total_earnings_female) %>%
mutate(
gender = if_else(
gender_earnings == 'total_earnings_male', 'Male', 'Female')) %>%
mutate(
occupation = fct_relevel(occupation, top10Jobs2016),
gender = as.factor(gender))
jobsDumbell2013 <- ggplot(jobsDumbellData2013,
aes(x = income, y = occupation)) +
geom_line(aes(group = occupation),
color = 'grey', size = 1) +
geom_point(aes(color = gender), size = 2.5, show.legend = FALSE) +
theme_minimal_vgrid(font_size = 11) +
theme(
axis.line.y = element_blank(),
axis.ticks.y = element_blank()) +
labs(x = 'Estimated Median Earnings (US$)',
y = 'Occupation',
color = 'Gender',
title = 'Wage Gap for the 10 Highest Paid Jobs', subtitle = "From year 2013") +
geom_rect(data=jobsDumbellData2013, aes(xmin=250000, xmax=300000, ymin=-Inf, ymax=Inf), fill="grey") +
geom_text(data=jobsDumbellData2013, aes(label=scales::dollar(wage_dif), y=occupation, x=275000), color = "black", fontface="bold", size=3) +
geom_text(data=filter(jobsDumbellData2013, occupation=="Physicians and surgeons"),
aes(x=275000, y=occupation, label="Difference"),
color="black", size=3, vjust=-2, fontface="bold") +
scale_x_continuous(expand=expansion(mult=c(.015,0.05)), limits=c(50000, 300000), labels = scales::dollar, breaks = seq(50000,250000,50000)) +
scale_color_manual(
values = c('#D95F02', '#1B9E77')) +
scale_y_discrete(expand=c(0.12,0)) +
geom_text(data=filter(jobsDumbellData2013, occupation=="Physicians and surgeons"),
aes(x=202533, y=occupation, label="Male"),
color="#1B9E77", size=3, vjust=-1.5, fontface="bold") +
geom_text(data=filter(jobsDumbellData2013, occupation=="Physicians and surgeons"),
aes(x=140036, y=occupation, label="Female"),
color="#D95F02", size=3, vjust=-1.5, fontface="bold")
plot(jobsDumbell2013)
occupationsGapData <- jobs_gender %>%
mutate(wage_dif = total_earnings_male - total_earnings_female) %>%
select(year, occupation, wage_dif) %>%
filter(occupation %in% top10Jobs2016)
ggplot() +
geom_line(data = occupationsGapData, aes(x=year, y = wage_dif, color = occupation), alpha = 0.75 , size = 0.75) +
geom_text_repel(data = occupationsGapData %>%
filter(year == max(year)), aes(x= year, y = wage_dif, label = occupation, color = occupation), hjust=0, nudge_x = 1, direction = "y", size = 3, segment.color = NA) +
scale_x_continuous(breaks = seq(2013,2016, 1),
expand = expansion(add = c(0,.85))) +
scale_y_continuous(labels = scales::dollar_format(scale =1)) +
scale_fill_viridis() +
theme_half_open(font_size = 11) +
theme(legend.position = 'none') +
labs(x= "Year", y = "Wage Difference from Male to Female", title = "Wage Gap Change Over The Years For The Top 10 Highest Paid Jobs", caption = "A negative value corresponds to the female salary being greater than the male salary.")