knitr::opts_chunk$set(
comment = "#>",
fig.align = "center",
fig.show = "hold",
fig.width = 7,
fig.asp = 0.618,
fig.retina = 3,
fig.path = "figs_auto/",
out.width = "90%"
)
library(tidyverse)
library(here)
library(janitor)
library(lubridate)
library(rvest)
library(gridExtra)
library(kableExtra)
library(cowplot)
library(ggrepel)
library(gganimate)
library(plotly)
library(reactable)
us_elec_gen <- tibble(
Type = c("Renewables", "Natural Gas", "Nuclear", "Coal"),
Percentage = c(0.23, 0.39, 0.18, 0.19)
)
us_elec_bar <- us_elec_gen %>%
ggplot(aes(x = reorder(Type, -Percentage),
y = Percentage,
fill = Type)) +
geom_col(width = 0.5) +
scale_fill_manual(values = c("Renewables" = "#719847",
"Natural Gas" = "gray",
"Nuclear" = "gray",
"Coal" = "dimgray")) +
geom_text(aes(label = scales::percent(Percentage)),
vjust = -0.3,
size = 5,
family = "Ubuntu") +
scale_y_continuous(expand = expansion(mult = c(0, 0.15)),
labels = scales::percent_format()) +
theme_minimal_hgrid(font_family = "Ubuntu") +
theme(legend.position = "none",
axis.text.x = element_text(size = 11)) +
labs(x = "Energy Type",
y = "Electricity Share",
title = "Renewables outnumber coal for electricity\ngeneration in 2022",
caption = "Data from BCSE")
us_elec_bar
# 1. Load and clean data of international solar and wind gen
drop <- c('World', 'Asia', 'Europe', 'North America',
'European Union', 'South America', 'Eurasia', 'Africa')
internationalPath <- here('data', 'irena_solar_wind_international_raw.csv')
international <- read_csv(internationalPath, skip = 2) %>%
rename(Country = 'Region/country/area') %>%
pivot_longer(
names_to = 'Year',
values_to = 'Generation',
cols = '2000':'2021') %>%
mutate(
Year = as.numeric(Year)
) %>%
filter(!Country %in% drop)
international %>%
write_csv(here('data', 'irena_solar_wind_international_long.csv'))
# 2. International Solar Generation
# 2.1 Solar gen data frame
solar_international <- international %>%
filter(Technology == 'Solar')
# 2.2 Top 5 solar producers in 2021
top_5_solar <- solar_international %>%
filter(Year == '2021') %>%
arrange(desc(Generation)) %>%
slice(1:5)
# 2.3 Plot of top 5 solar producers
solar_international_plot <-
ggplot(solar_international %>%
filter(Country %in% c(top_5_solar$Country)),
aes(x = Year, y = Generation/1000, group = Country, color = Country)) +
geom_line(linewidth = 1) +
geom_text_repel(
data = top_5_solar,
aes(label = Country),
hjust = 0, nudge_x = 1, direction = "y",
size = 5, segment.color = NA,
family = "Ubuntu") +
scale_x_continuous(
breaks = seq(2000, 2021, 3),
expand = expansion(add = c(1, 5))) +
scale_y_continuous(expand = expand_scale(mult = c(0, 0.05))) +
labs(x = 'Year',
y = 'Generation (TWh)',
title = 'United States is the second highest producer of\nsolar energy',
subtitle = 'Top 5 producers of solar energy in 2021',
caption = 'Data from IRENA') +
scale_color_manual(values = c('#719847','#ccca5d','#b2529d','#4c67a5','#a5734c')) +
theme_minimal_hgrid(font_family = 'Ubuntu') +
theme(legend.position = 'none')
ggsave(
filename = here('figs_manu', '3_1_solar_inter_plot.png'),
plot = solar_international_plot,
width = 8,
height = 8 / 1.618
)
# 3. International Wind Generation
# 3.1 Wind gen data frame
wind_international <- international %>%
filter(Technology == 'Wind')
# 3.2 Top 5 wind producers in 2021
top_5_wind <- wind_international %>%
filter(Year == '2021') %>%
arrange(desc(Generation)) %>%
slice(1:5)
# 3.3 Plot of top 5 wind producers
wind_international_plot <-
ggplot(wind_international %>%
filter(Country %in% c(top_5_wind$Country)),
aes(x = Year, y = Generation/1000, group = Country, color = Country)) +
geom_line(linewidth = 1) +
geom_text_repel(
data = top_5_wind,
aes(label = Country),
hjust = 0, nudge_x = 1, direction = "y",
size = 5, segment.color = NA,
family = "Ubuntu") +
scale_x_continuous(
breaks = seq(2000, 2021, 3),
expand = expansion(add = c(1, 5))) +
scale_y_continuous(expand = expand_scale(mult = c(0, 0.05))) +
labs(x = 'Year',
y = 'Generation (TWh)',
title = 'United States is the second highest producer of\nwind energy',
subtitle = 'Top 5 producers of wind energy in 2021',
caption = 'Data from IRENA') +
scale_color_manual(values = c('#719847','#ccca5d','#b2529d','#4c67a5','#a5734c')) +
theme_minimal_hgrid(font_family = 'Ubuntu') +
theme(legend.position = 'none')
ggsave(
filename = here('figs_manu', '3_1_wind_inter_plot.png'),
plot = wind_international_plot,
width = 8,
height = 8 / 1.618
)
# 4. International summary table
sum_international <- international %>%
group_by(Country, Technology) %>%
summarize(`Total Generation (tWh)` =
sum(Generation, na.rm = TRUE)) %>%
rename(`Technology (Solar or Wind)` = Technology)
sum_international_table <- sum_international %>%
reactable(
searchable = TRUE,
highlight = TRUE,
filterable = TRUE,
defaultPageSize = 5,
showPageSizeOptions = TRUE,
pageSizeOptions = c(5, 10, 15)
)
solar_international_plot
wind_international_plot
sum_international_table
# 1. Solar generation data processing
solar_gen_path <- here('data', 'eia_solar_gen_raw.csv')
solar_gen <- read_csv(solar_gen_path, skip = 4) %>%
clean_names() %>%
select('month', 'united_states_thousand_megawatthours') %>%
arrange(month) %>%
mutate(
month = dmy(paste('01', month)),
month = format(month, '%Y-%m'),
year = as.numeric(substr(month, 1, 4))
) %>%
filter(year <= 2022) %>%
group_by(year) %>%
summarise(solar_gen_gwh = sum(united_states_thousand_megawatthours))
write_csv(solar_gen, here('data', 'eia_solar_gen.csv'))
# 2. Wind generation data processing
wind_gen_path <- here('data', 'eia_wind_gen_raw.csv')
wind_gen <- read_csv(wind_gen_path, skip = 4) %>%
clean_names() %>%
select('month', 'united_states_thousand_megawatthours') %>%
arrange(month) %>%
mutate(
month = dmy(paste('01', month)),
month = format(month, '%Y-%m'),
year = as.numeric(substr(month, 1, 4))
) %>%
filter(year <= 2022) %>%
group_by(year) %>%
summarise(wind_gen_gwh = sum(united_states_thousand_megawatthours))
write_csv(wind_gen, here('data', 'eia_wind_gen.csv'))
# 3. Long format & exporting
solar_wind_gen_longer <- inner_join(solar_gen, wind_gen, by = 'year') %>%
pivot_longer(names_to = 'energy_source',
values_to = 'gwh',
cols = c('solar_gen_gwh', 'wind_gen_gwh'))
write_csv(solar_wind_gen_longer,
here('data', 'eia_solar_wind_gen_longer.csv'))
# 4. Plot the Solar and Wind Generation over Time
generation_plot <- solar_wind_gen_longer %>%
ggplot(aes(x = year,
y = gwh / 1000,
color = energy_source)) +
geom_line(linewidth = 1) +
geom_point(fill = 'white', size = 2, shape = 21) +
geom_text(data = subset(solar_wind_gen_longer, year == max(year)),
aes(label = recode(energy_source,
solar_gen_gwh = 'Solar',
wind_gen_gwh = 'Wind')),
nudge_x = -1.15,
nudge_y = 10,
size = 5,
family = "Ubuntu") +
labs(x = 'Year',
y = 'Generation (TWh)',
color = 'Energy Source',
title = 'Wind outpaces solar energy generation in the USA\nevery year',
subtitle = 'Wind generation is more than double solar generation in 2022',
caption = 'Data from EIA') +
scale_x_continuous(breaks = seq(2000, 2022, by = 2)) +
scale_y_continuous(expand = expand_scale(mult = c(0, 0.05))) +
scale_color_manual(values = c('#ccca5d','#b2529d'))+
theme_minimal_hgrid(font_family = 'Ubuntu') +
theme(legend.position = 'none')
ggsave(
filename = here('figs_manu', '3_2_generation_plot.png'),
plot = generation_plot,
width = 7,
height = 7 / 1.618
)
generation_plot
# 1. Read solar and wind cost data and merge to one
solar_cost <- read_csv(here('data', 'irena_solar_cost.csv'))
wind_cost <- read_csv(here('data', 'irena_wind_cost.csv'))
solar_wind_cost <- left_join(solar_cost, wind_cost, by = 'year')
# 2. Pivot to longer format and export to csv
solar_wind_cost_longer <- solar_wind_cost %>%
pivot_longer(names_to = 'energy_source',
values_to = 'cost',
cols = -year)
write_csv(solar_wind_cost_longer,
here('data', 'irena_solar_wind_cost_longer.csv'))
# 3. Create plot of solar and wind costs compared to fossil fuels
cost_plot <- solar_wind_cost_longer %>%
ggplot(aes(x = year,
y = cost,
color = energy_source)) +
annotate('rect',
xmin = -Inf, xmax = Inf,
ymin = 0.05, ymax = 0.177,
fill = 'gray', alpha = 0.3) +
annotate('text',
x = 2013, y = 0.08,
color = 'dimgray',
label = 'The shaded area is the range of fossil fuel costs.',
family = "Ubuntu") +
geom_line(linewidth = 1) +
geom_point(fill = 'white', size = 2, shape = 21) +
geom_text_repel(data = subset(solar_wind_cost_longer,
year == max(year)),
aes(label = recode(energy_source,
solar_cost_usd_per_kwh = 'Solar',
wind_cost_usd_per_kwh = 'Wind')),
nudge_x = 0.5,
nudge_y = 0,
size = 4.5,
family = "Ubuntu") +
labs(x = 'Year',
y = 'Cost (USD/kWh, 2022 prices)',
color = 'Energy Source',
title = 'The cost of solar has significantly decreased in the USA \n becoming cheaper than wind in 2017',
caption = 'Data from IRENA') +
scale_x_continuous(breaks = seq(2010, 2019, by = 1)) +
scale_y_continuous(limits = c(0, 0.4), breaks = c(0.1, 0.2, 0.3, 0.4), expand = expand_scale(mult = c(0, 0.05)), labels = scales::dollar)+
scale_color_manual(values = c('#ccca5d','#b2529d'))+
theme_minimal_hgrid(font_family = 'Ubuntu') +
theme(legend.position = 'none')
ggsave(
filename = here('figs_manu', '3_3_cost_plot.png'),
plot = cost_plot,
width = 7,
height = 7 / 1.618
)
cost_plot
# 1. Scrape and save the raw policy data frame
# 1.1 Read the html from local path
laws_html_path <- here('data', 'dsire_laws_raw.html')
laws_html <- read_html(laws_html_path)
# 1.2 Scrape the table as a df
laws_table <- laws_html %>%
html_nodes('table') %>%
.[[1]] %>%
html_table()
# 1.3 Save the df to csv
write_csv(laws_table, here('data', 'dsire_laws_raw.csv'))
# 2. Construct solar and wind policy data frame
# 2.1 Solar laws
solar_laws <- read_csv(here('data', 'dsire_laws_raw.csv')) %>%
filter(grepl("Solar", Name, ignore.case = TRUE) |
grepl("Solar", `Policy/Incentive Type`, ignore.case = TRUE)) %>%
mutate(Created = as.integer(substr(Created, 7, 10))) %>%
count(Created) %>%
rename(year = Created, count_of_solar_laws = n)
write_csv(solar_laws, here('data', 'dsire_solar_laws.csv'))
# 2.2 Wind laws
wind_laws <- read_csv(here('data', 'dsire_laws_raw.csv')) %>%
filter(grepl("Wind", Name, ignore.case = TRUE) |
grepl("Wind", `Policy/Incentive Type`, ignore.case = TRUE)) %>%
mutate(Created = as.integer(substr(Created, 7, 10))) %>%
count(Created) %>%
rename(year = Created, count_of_wind_laws = n)
write_csv(wind_laws, here('data', 'dsire_wind_laws.csv'))
# 3. Plots of solar and wind laws
# 3.1 Judge if congress is the same party as the president
congress_path <- here('data', 'congress.csv')
congress <- read_csv(congress_path)
# 3.2 Solar laws plot
solar_laws_plot <- solar_laws %>%
left_join(congress, by = 'year') %>%
filter(year <= 2020) %>%
ggplot() +
geom_col(aes(x = year, y = count_of_solar_laws, fill = both)) +
geom_vline(xintercept = 2000.5, color = 'black', linetype = 'solid') +
geom_vline(xintercept = 2008.5, color = 'black', linetype = 'solid') +
geom_vline(xintercept = 2016.5, color = 'black', linetype = 'solid') +
annotate(
'text', x = 2000.35, y = 70, color = '#4c67a5', hjust = 1,
label = 'Clinton', size = 5, family = "Ubuntu") +
annotate(
'text', x = 2005, y = 70, color = '#ad2730', hjust = 1,
label = 'Bush', size = 5, family = "Ubuntu") +
annotate(
'text', x = 2014, y = 70, color = '#4c67a5', hjust = 1,
label = 'Obama', size = 5, family = "Ubuntu") +
annotate(
'text', x = 2019.75, y = 70, color = '#ad2730', hjust = 1,
label = 'Trump', size = 5, family = "Ubuntu") +
geom_label(data = data.frame(x = 2004.5, y = 50, label = 'Both Congress and the \nPresident are of the \nsame political party'),
aes(x = x, y = y, label = label),
size = 3.5, color = '#719847', family = "Ubuntu") +
geom_curve(data = data.frame(x = 2004.5, xend = 2001.5, y = 42, yend = 10),
mapping = aes(x = x, xend = xend, y = y, yend = yend),
color = '#719847', size = 0.5, curvature = 0.1,
arrow = arrow(length = unit(0.01, "npc"), type = "closed")) +
geom_curve(data = data.frame(x = 2004.5, xend = 2005, y = 42, yend = 10),
mapping = aes(x = x, xend = xend, y = y, yend = yend),
color = '#719847', size = 0.5, curvature = -0.1,
arrow = arrow(length = unit(0.01, "npc"), type = "closed")) +
geom_curve(data = data.frame(x = 2004.5, xend = 2009.5, y = 42, yend = 25),
mapping = aes(x = x, xend = xend, y = y, yend = yend),
color = '#719847', size = 0.5, curvature = -0.1,
arrow = arrow(length = unit(0.01, "npc"), type = "closed")) +
scale_x_continuous(breaks = seq(2000, 2020, by = 2),
limits = c(1998, 2020),
expand = expand_scale(mult = c(0, 0.05))) +
scale_y_continuous(breaks = seq(0, 60, by = 10),
limits = c(0 , 70),
expand = expand_scale(mult = c(0, 0.05))) +
scale_fill_manual(values = c('#dbd9d6', '#719847')) +
labs(x = 'Year',
y = 'Number of Solar Policies',
title = 'State solar policies appear to coincide with\ndemocratic presidential terms',
subtitle = 'Alignment of political parties does not appear to impact legislation',
caption = 'Data from DSIRE') +
theme_minimal_hgrid(font_family = 'Ubuntu') +
theme(legend.position = 'none')
ggsave(
filename = here('figs_manu', '3_4_solar_laws_plot.png'),
plot = solar_laws_plot,
width = 7,
height = 7 / 1.618
)
# 3.3 Wind policy plot
wind_laws_plot <- wind_laws %>%
left_join(congress, by = 'year') %>%
filter(year <= 2020) %>%
ggplot() +
geom_col(aes(x = year, y = count_of_wind_laws, fill = both)) +
geom_vline(xintercept = 2000.5, color = 'black', linetype = 'solid') +
geom_vline(xintercept = 2008.5, color = 'black', linetype = 'solid') +
geom_vline(xintercept = 2016.5, color = 'black', linetype = 'solid') +
annotate(
'text', x = 2000.35, y = 70, color = '#4c67a5', hjust = 1,
label = 'Clinton', size = 5, family = "Ubuntu") +
annotate(
'text', x = 2005, y = 70, color = '#ad2730', hjust = 1,
label = 'Bush', size = 5, family = "Ubuntu") +
annotate(
'text', x = 2014, y = 70, color = '#4c67a5', hjust = 1,
label = 'Obama', size = 5, family = "Ubuntu") +
annotate(
'text', x = 2019.75, y = 70, color = '#ad2730', hjust = 1,
label = 'Trump', size = 5, family = "Ubuntu") +
geom_label(data = data.frame(x = 2004.5, y = 50, label = 'Both Congress and the \nPresident are of the \nsame political party'),
aes(x = x, y = y, label = label),
size = 3.5, color = '#719847', family = "Ubuntu") +
geom_curve(data = data.frame(x = 2004.5, xend = 2001.5, y = 42, yend = 10),
mapping = aes(x = x, xend = xend, y = y, yend = yend),
color = '#719847', size = 0.5, curvature = 0.1,
arrow = arrow(length = unit(0.01, "npc"), type = "closed")) +
geom_curve(data = data.frame(x = 2004.5, xend = 2005, y = 42, yend = 10),
mapping = aes(x = x, xend = xend, y = y, yend = yend),
color = '#719847', size = 0.5, curvature = -0.1,
arrow = arrow(length = unit(0.01, "npc"), type = "closed")) +
geom_curve(data = data.frame(x = 2004.5, xend = 2009.5, y = 42, yend = 25),
mapping = aes(x = x, xend = xend, y = y, yend = yend),
color = '#719847', size = 0.5, curvature = -0.1,
arrow = arrow(length = unit(0.01, "npc"), type = "closed")) +
scale_x_continuous(breaks = seq(2000, 2020, by = 2),
limits = c(1998, 2020),
expand = expand_scale(mult = c(0, 0.05))) +
scale_y_continuous(breaks = seq(0, 60, by = 10),
limits = c(0 , 70),
expand = expand_scale(mult = c(0, 0.05))) +
scale_fill_manual(values = c('#dbd9d6', '#719847')) +
labs(x = 'Year',
y = 'Number of Wind Policies',
title = 'State wind policies appear to coincide with\ndemocratic presidential terms',
subtitle = 'Alignment of political parties does not appear to impact legislation',
caption = 'Data from DSIRE') +
theme_minimal_hgrid(font_family = 'Ubuntu') +
theme(legend.position = 'none')
ggsave(
filename = here('figs_manu', '3_4_wind_laws_plot.png'),
plot = wind_laws_plot,
width = 7,
height = 7 / 1.618
)
solar_laws_plot
wind_laws_plot
# Store the wider format for later use
solar_wind_budget_wider <- read_csv(
here('data', 'iea_solar_wind_budget_wider.csv')
)
# Generate the longer format
solar_wind_budget_longer <- read_csv(
here('data', 'iea_solar_wind_budget_wider.csv')
) %>%
pivot_longer(cols = -year,
names_to = 'energy_source',
values_to = 'rdd_millions_usd')
# Prepare the label data for the plot
label_data <- subset(solar_wind_budget_longer, year == max(year))
label_data$nudge <- ifelse(label_data$energy_source ==
'solar_rdd_million_usd', 220, -100)
# Sketch the plot
budget_plot <- solar_wind_budget_longer %>%
ggplot(aes(x = year,
y = rdd_millions_usd,
color = energy_source)) +
geom_line(linewidth = 1) +
geom_text(data = label_data,
aes(label = recode(energy_source,
solar_rdd_million_usd = 'Solar',
wind_rdd_million_usd = 'Wind')),
nudge_x = 1.75,
nudge_y = label_data$nudge, size = 5,
family = "Ubuntu") +
labs(x = 'Year',
y = 'RD&D Budget (Millions, 2022 USD)',
color = 'Energy Source',
title = 'Solar and wind energy RD&D budgets follow\nsimilar trends in the USA',
subtitle = 'Solar budget was higher than wind for all years',
caption = 'Data from IEA') +
scale_x_continuous(limits = c(1970, 2018), breaks = seq(1975, 2015, by = 5)) +
theme_minimal_hgrid(font_family = 'Ubuntu') +
scale_color_manual(values = c('#ccca5d','#b2529d'))+
scale_y_continuous(labels = scales::dollar)+
theme(legend.position = 'none') +
# Carter box
geom_label(data = data.frame(x = 1972.6, y = 990, label = 'President Carter\nannounces\nNational Energy\nProgram'),
aes(x = x, y = y, label = label),
size = 3.5, color = '#719847', color = 'black',
family = "Ubuntu") +
annotate('point', x = 1977 , y = 578.5, color = '#719847', size = 2) +
annotate('point', x = 1977 , y = 83.0, color = '#719847', size = 2) +
geom_curve(
data = data.frame(
x = 1973, xend = 1977, y = 790, yend = 578.5),
mapping = aes(x = x, xend = xend, y = y, yend = yend),
color = '#719847', size = 0.5, curvature = 0.1,
arrow = arrow(length = unit(0.01, "npc"),
type = "closed")) +
geom_curve(
data = data.frame(
x = 1973, xend = 1977, y = 790, yend = 83.0),
mapping = aes(x = x, xend = xend, y = y, yend = yend),
color = '#719847', size = 0.5, curvature = 0.1,
arrow = arrow(length = unit(0.01, "npc"),
type = "closed")) +
# Treasury box
geom_label(data = data.frame(x = 2007, y = 750, label = 'Section 1603\nProgram begins'),
aes(x = x, y = y, label = label),
size = 3.5, color = '#719847', color = 'black',
family = "Ubuntu") +
annotate('point', x = 2009 , y = 531.5, color = '#719847', size = 2) +
annotate('point', x = 2009 , y = 247.7, color = '#719847', size = 2) +
geom_curve(
data = data.frame(
x = 2007, xend = 2009, y = 650, yend = 531.5),
mapping = aes(x = x, xend = xend, y = y, yend = yend),
color = '#719847', size = 0.5, curvature = 0.1,
arrow = arrow(length = unit(0.01, "npc"),
type = "closed")) +
geom_curve(
data = data.frame(
x = 2007, xend = 2009, y = 650, yend = 247.7),
mapping = aes(x = x, xend = xend, y = y, yend = yend),
color = '#719847', size = 0.5, curvature = 0.1,
arrow = arrow(length = unit(0.01, "npc"),
type = "closed"))
ggsave(
filename = here('figs_manu', '3_5_budget_plot.png'),
plot = budget_plot,
width = 7,
height = 7 / 1.618
)
budget_plot
# 1. Merge and save solar and wind gen
merged_solar_wind <- solar_cost %>%
full_join(solar_gen, by = 'year') %>%
full_join(solar_laws, by = 'year') %>%
full_join(solar_wind_budget_wider, by = 'year') %>%
full_join(wind_cost, by = 'year') %>%
full_join(wind_gen, by = 'year') %>%
full_join(wind_laws, by = 'year') %>%
arrange(year)
write_csv(merged_solar_wind, here('data', 'merged_solar_wind.csv'))
# 2. Correlation coefficients between solar generation and other solar related variables:
# cost (solar_cost_usd_per_kwh), laws (count_of_solar_laws), and budget(solar_rdd_million_usd)
cor_solar_gen_solar_cost <- cor(
merged_solar_wind$solar_gen_gwh,
merged_solar_wind$solar_cost_usd_per_kwh,
use = 'pairwise.complete.obs',
method = 'spearman'
)
cor_solar_gen_solar_laws <- cor(
merged_solar_wind$solar_gen_gwh,
merged_solar_wind$count_of_solar_laws,
use = 'pairwise.complete.obs',
method = 'spearman'
)
cor_solar_gen_solar_rdd <- cor(
merged_solar_wind$solar_gen_gwh,
merged_solar_wind$solar_rdd_million_usd,
use = 'pairwise.complete.obs',
method = 'spearman'
)
# 3. Correlation coefficients for wind generation and other wind variables
cor_wind_gen_wind_cost <- cor(
merged_solar_wind$wind_gen_gwh,
merged_solar_wind$wind_cost_usd_per_kwh,
use = 'pairwise.complete.obs',
method = 'spearman'
)
cor_wind_gen_wind_laws <- cor(
merged_solar_wind$wind_gen_gwh,
merged_solar_wind$count_of_wind_laws,
use = 'pairwise.complete.obs',
method = 'spearman'
)
cor_wind_gen_wind_rdd <- cor(
merged_solar_wind$wind_gen_gwh,
merged_solar_wind$wind_rdd_million_usd,
use = 'pairwise.complete.obs',
method = 'spearman'
)
# 4. Create dataframe of correlations
cor_solar_wind <- data.frame(value_1 = c('Solar', 'Solar', 'Solar',
'Wind','Wind', 'Wind'),
value_2 = c('Cost', 'Laws', 'Budgets',
'Cost', 'Laws', 'Budgets'),
cor = c(cor_solar_gen_solar_cost,
cor_solar_gen_solar_laws,
cor_solar_gen_solar_rdd,
cor_wind_gen_wind_cost,
cor_wind_gen_wind_laws,
cor_wind_gen_wind_rdd))
write_csv(cor_solar_wind, here('data', 'cor_solar_wind.csv'))
# 5. Summary table of the correlations
cor_solar_wind %>%
mutate(cor = round(cor, 2),
cor = cell_spec(
cor, 'html',
color = ifelse(
cor > 0.5 | cor < -0.5, 'white', 'black'
),
background = ifelse(
cor > 0.5 | cor < -0.5, 'cornflowerblue', 'white'
))) %>%
kable(format = 'html',
escape = FALSE,
align = c('l', 'l', 'l'),
col.names = c('Energy Type', 'Compared Variable', 'Correlation'),
caption = 'Correlations of Solar/Wind Generation vs the Other 3 Variables') %>%
kable_styling(bootstrap_options = c('striped', 'hover'))
# 1. Merge and save datasets
# 1.1 Solar data
merged_solar <- solar_cost %>%
full_join(solar_gen, by = 'year') %>%
full_join(solar_laws, by = 'year') %>%
full_join(solar_wind_budget_wider %>%
select(-wind_rdd_million_usd),
by = 'year') %>%
arrange(year) %>%
mutate(log_solar_cost = log(solar_cost_usd_per_kwh),
log_solar_gen = log(solar_gen_gwh))
write_csv(merged_solar, here('data', 'merged_solar.csv'))
# 1.2 Wind data
merged_wind <- wind_cost %>%
full_join(wind_gen, by = 'year') %>%
full_join(wind_laws, by = 'year') %>%
full_join(solar_wind_budget_wider %>%
select(-solar_rdd_million_usd),
by = 'year') %>%
arrange(year) %>%
mutate(log_wind_cost = log(wind_cost_usd_per_kwh),
log_wind_gen = log(wind_gen_gwh))
write_csv(merged_wind, here('data', 'merged_wind.csv'))
# 2. Correlation matrix for merged data
# 2.1 Solar correlation matrix
cor_matrix_solar <- cor(
merged_solar[, sapply(merged_solar, is.numeric)],
use = 'pairwise.complete.obs',
method = 'spearman'
) %>%
as.data.frame()
# 2.2 Wind correlation matrix
cor_matrix_wind <- cor(
merged_wind[, sapply(merged_wind, is.numeric)],
use = 'pairwise.complete.obs',
method = 'spearman'
) %>%
as.data.frame()
# 3. Filter 'high' correlations with threshold of +/- 0.8
# 3.1 High solar correlations
cor_solar_high <- cor_matrix_solar %>%
mutate(value_1 = row.names(.)) %>%
gather(value_2, cor, -value_1) %>%
filter(value_1 != value_2,
cor > 0.8 | cor < -0.8) %>%
mutate(min_var = pmin(value_1, value_2),
max_var = pmax(value_1, value_2)) %>%
distinct(min_var, max_var, .keep_all = TRUE) %>%
select(-min_var, -max_var) %>%
arrange(value_1) %>%
filter((startsWith(value_1, "log") & startsWith(value_2, "log")) |
(!startsWith(value_1, "log") & !startsWith(value_2, "log")))
write_csv(cor_solar_high, here('data', 'cor_solar_high.csv'))
# 3.2 High wind correlations
cor_wind_high <- cor_matrix_wind %>%
mutate(value_1 = row.names(.)) %>%
gather(value_2, cor, -value_1) %>%
filter(value_1 != value_2,
cor > 0.8 | cor < -0.8) %>%
mutate(min_var = pmin(value_1, value_2),
max_var = pmax(value_1, value_2)) %>%
distinct(min_var, max_var, .keep_all = TRUE) %>%
select(-min_var, -max_var) %>%
arrange(value_1) %>%
filter((startsWith(value_1, "log") & startsWith(value_2, "log")) |
(!startsWith(value_1, "log") & !startsWith(value_2, "log")))
write_csv(cor_wind_high, here('data', 'cor_wind_high.csv'))
# 4. Construct the 'high' correlation matrix table
# There are 4 in solar and 3 in wind
# Excluding time series, there are 2 in solar and 1 in wind
cor_matrix_high <- tibble(
`No.` = seq(1,7),
`Value 1` = c("Solar Cost", "Solar Gen", "log Solar Gen", "Solar RDD",
"Wind Cost", "Wind Gen", "log Wind Gen"),
`Value 2` = c("Year", "Year", "log Solar Cost", "Solar Cost",
"Year", "Year", "log Wind Cost"),
Correlation = c(round(cor_solar_high[2,3], 2),
round(cor_solar_high[3,3], 2),
round(cor_solar_high[1,3], 2),
round(cor_solar_high[5,3], 2),
round(cor_wind_high[2,3], 2),
round(cor_wind_high[3,3], 2),
round(cor_wind_high[1,3], 2))
)
cor_matrix_high_kable <- cor_matrix_high %>% kable(
format = 'html', escape = FALSE,
align = c('c', 'l', 'l', 'c'),
caption = 'Correlations Matrix of High Correlations') %>%
kable_styling(bootstrap_options = c('striped', 'hover'))
cor_matrix_high_kable
# Plot 1: Log Solar Cost vs Log Solar Generation
cor_plot_1 <- merged_solar_wind %>%
filter(!is.na(solar_cost_usd_per_kwh) & !is.na(solar_gen_gwh)) %>%
ggplot(aes(x = log(solar_cost_usd_per_kwh),
y = log(solar_gen_gwh / 1000))) +
geom_point() +
geom_smooth(method = 'lm', se = FALSE, size = 0.5,
color = 'cornflowerblue') +
labs(x = 'Solar Cost: log(USD/kWh)',
y = 'Solar Generation: log(TWh)',
title='Log Solar Cost vs Log Solar Generation',
subtitle = 'Correlation = -1') +
theme_minimal_grid(font_family = 'Ubuntu')
ggsave(
filename = here('figs_manu', '3_6_cor_plot_1.png'),
plot = cor_plot_1,
width = 6,
height = 6 / 1.618
)
# Plot 2: log Wind Cost vs log Wind Generation
cor_plot_2 <- merged_solar_wind %>%
filter(!is.na(wind_cost_usd_per_kwh) & !is.na(wind_gen_gwh)) %>%
ggplot(aes(x = log(wind_cost_usd_per_kwh),
y = log(wind_gen_gwh / 1000))) +
geom_point() +
geom_smooth(method = 'lm', se = FALSE, size = 0.5,
color = 'cornflowerblue') +
labs(x = 'Wind Cost: log(USD/kWh)',
y = 'Wind Generation: log(TWh)',
title='Log Wind Cost vs Log Wind Generation',
subtitle = 'Correlation = -0.81') +
theme_minimal_grid(font_family = 'Ubuntu')
ggsave(
filename = here('figs_manu', '3_6_cor_plot_2.png'),
plot = cor_plot_2,
width = 6,
height = 6 / 1.618
)
# Plot 3: Solar RD&D vs Solar Cost
cor_plot_3 <- merged_solar_wind %>%
filter(!is.na(solar_rdd_million_usd) &
!is.na(solar_cost_usd_per_kwh)) %>%
ggplot(aes(x = solar_rdd_million_usd,
y = solar_cost_usd_per_kwh)) +
geom_point() +
geom_smooth(method = 'lm', se = FALSE, size = 0.5,
color = 'cornflowerblue') +
labs(x = 'Solar RD&D (USD)',
y = 'Solar Cost (USD / kWh)',
title='Solar RD&D vs Solar Cost',
subtitle = 'Correlation = 0.89') +
theme_minimal_grid(font_family = 'Ubuntu')
ggsave(
filename = here('figs_manu', '3_6_cor_plot_3.png'),
plot = cor_plot_3,
width = 6,
height = 6 / 1.618
)
cor_plot_1
cor_plot_2
cor_plot_3
# 1. Calculate the summary and export to csv
summary_solar_gen <- summary(solar_gen$solar_gen_gwh)
summary_solar_cost <- summary(solar_cost$solar_cost_usd_per_kwh)
summary_solar_laws <- summary(solar_laws$count_of_solar_laws)
summary_solar_budget <- summary(solar_wind_budget_wider$solar_rdd_million_usd)
summary_solar_df <- data.frame(
tier = names(summary_solar_gen),
solar_gen_gwh = round(as.numeric(summary_solar_gen)),
solar_cost_usd_per_kw = round(as.numeric(summary_solar_cost), 2),
solar_laws_count = round(as.numeric(summary_solar_laws)),
solar_budget_million_usd = round(as.numeric(summary_solar_budget))
)
write_csv(summary_solar_df, here('data', 'summary_solar.csv'))
# 2. Generate the table using kable()
summary_solar_df %>%
kable(format = 'html',
escape = FALSE,
col.names = c('Tier',
'Electricity Generation (GWh)',
'Cost (USD/kWh)',
'Laws Count',
'RD&D Budget (Million USD)'),
caption = 'Summary of Solar') %>%
kable_styling(bootstrap_options = c('striped', 'hover'))
# 1. Calculate the summary and export to csv
summary_wind_gen <- summary(wind_gen$wind_gen_gwh)
summary_wind_cost <- summary(wind_cost$wind_cost_usd_per_kwh)
summary_wind_cost <- summary_wind_cost[!names(summary_wind_cost) == "NA's"]
summary_wind_laws <- summary(wind_laws$count_of_wind_laws)
summary_wind_budget <- summary(solar_wind_budget_wider$wind_rdd_million_usd)
summary_wind_df <- data.frame(
tier = names(summary_wind_gen),
wind_gen_gwh = round(as.numeric(summary_wind_gen)),
wind_cost_usd_per_kw = round(as.numeric(summary_wind_cost), 2),
wind_laws_count = round(as.numeric(summary_wind_laws)),
wind_budget_million_usd = round(as.numeric(summary_wind_budget))
)
write_csv(summary_wind_df, here('data', 'summary_wind.csv'))
# 2. Generate the table using kable()
summary_wind_df %>%
kable(format = 'html',
escape = FALSE,
col.names = c('Tier',
'Electricity Generation (GWh)',
'Cost (USD/kWh)',
'Laws Count',
'RD&D Budget (Million USD)'),
caption = 'Summary of Wind') %>%
kable_styling(bootstrap_options = c('striped', 'hover'))
dict_cor <- data.frame(
name = c('cor_matrix_high',
'cor_matrix_solar',
'cor_matrix_wind',
'cor_solar_wind',
'cor_solar_high',
'cor_wind_high'),
type = c('A tibble: 7 × 4',
'A data frame: 6 × 6',
'A data frame: 6 × 6',
'A data frame: 8 × 3',
'A data frame: 5 × 3',
'A data frame: 2 × 3'),
description = c('Correlation matrix of high correlation values',
'Correlation matrix of all solar related variables',
'Correlation matrix of all wind related variables',
'Correlation between generation and 3 other variables',
'Solar related correlations higher than 0.8',
'Wind related correlations higher than 0.8')
) %>%
mutate(count = row_number()) %>%
select(count, everything())
dict_cor %>%
kable(format = 'html',
escape = FALSE,
align = c('c', 'l', 'l', 'l'),
col.names = c('No.', 'Name', 'Type', 'Description'),
caption = 'Dictionary of Correlations') %>%
kable_styling(bootstrap_options = c('striped', 'hover'))
dict_file <- data.frame(
name = c('congress',
'cor_solar_wind',
'dsire_laws_raw',
'dsire_solar_laws',
'dsire_wind_laws',
'eia_solar_gen_raw',
'eia_solar_gen',
'eia_solar_wind_gen_longer',
'eia_wind_gen_raw',
'eia_wind_gen',
'cor_solar_high',
'cor_wind_high',
'iea_solar_wind_budget_wider',
'irena_solar_cost',
'irena_solar_wind_cost_longer',
'irena_solar_wind_international_long',
'irena_solar_wind_international_raw',
'irena_wind_cost',
'merged_solar_wind',
'merged_solar',
'merged_wind',
'summary_solar',
'summary_wind',
'dsire_laws_raw',
'irena_costs_raw'),
type = c('CSV', 'CSV', 'CSV', 'CSV', 'CSV',
'CSV',