<- "https://asn.flightsafety.org/database/countries/N/"
base_url <- NULL
all_data <- c("Delta Air Lines", "American Airlines", "Jetblue Airways")
airlines_to_keep
for (i in 1:10)
{<- paste0(base_url, i)
url <- read_html(url)
page <- page %>%
table_data html_nodes("table") %>%
html_table(fill = TRUE)
<- table_data[[1]]
table_data colnames(table_data) <- make.names(colnames(table_data), unique = TRUE)
<- table_data %>% mutate(across(everything(), as.character))
table_data <- table_data %>%
table_data filter(
grepl("Delta Air Lines", operator, ignore.case = TRUE) |
grepl("American Airlines", operator, ignore.case = TRUE) |
grepl("Jetblue Airways", operator, ignore.case = TRUE)
)<- bind_rows(all_data, table_data)
all_data
}
<- all_data %>%
data select(-7,-9,-10)
<- data %>%
data_counts group_by(operator, dmg) %>%
summarize(count = n(), .groups = "drop")
<- data_counts %>%
data_long pivot_wider(names_from = dmg, values_from = count, values_fill = 0) %>%
select(-5)
<- data_long %>%
data rename(Minor = min,
None = non,
Substantial = sub)
<- data %>%
data group_by(operator = ifelse(operator == "Jetblue Airways", "JetBlue Airways", operator)) %>%# Combine "JetBlue Airways" and "Jetblue Airways"
summarize(Minor = sum(Minor),
None = sum(None),
Substantial = sum(Substantial)) %>%
mutate(operator = fct_reorder(operator, None)) %>%
mutate(
Cumulative_None = None,
Cumulative_Substantial = None + Substantial,
Cumulative_Minor = None + Substantial + Minor
)
<- "https://simpleflying.com/usa-top-10-domestic-airlines/"
url <- read_html(url)
webpage
<- webpage %>%
airline_data html_table(fill = TRUE) %>%
1]] %>%
.[[filter(row_number() %in% c(1,2, 7)) %>%
clean_names() %>%
mutate(avg_daily_domestic_flights_march = as.numeric(gsub(",","",avg_daily_domestic_flights_march))) %>%
select(-3,-4,-5,-6)
<- bind_cols(data, airline_data) %>%
combined_data_safety mutate(flights = (avg_daily_domestic_flights_march)*5110) %>%
mutate(none_per_flight = Cumulative_None/flights) %>%
mutate(minor_per_flight = Cumulative_Minor/flights) %>%
mutate(sub_per_flight = Cumulative_Substantial/flights)
ggplot(combined_data_safety, aes(x = operator)) +
geom_bar(aes(y = minor_per_flight, fill = "Minor"), stat = "identity") +
geom_bar(aes(y = sub_per_flight, fill = "Substantial"), stat = "identity") +
geom_bar(aes(y = none_per_flight, fill = "None"), stat = "identity") +
geom_text(aes(y = minor_per_flight / 6 + sub_per_flight, label = ifelse(minor_per_flight > 0, "Minor", "")),
size = 3, color = "black") +
geom_text(aes(y = sub_per_flight*8/9,
label = ifelse(sub_per_flight > 0, "Substantial", "")),
size = 3, color = "black") +
geom_text(aes(y = none_per_flight / 2 ,
label = ifelse(none_per_flight > 0, "None", "")),
size = 3, color = "black") +
labs(
title = "Accidents by Damage Type",
subtitle = "2010-2024",
x = ' ',
y = "Accidents Per Flight"
+
) scale_fill_manual(
values = c("Minor" = "#FFA500", "None" = "#1E90FF", "Substantial" = "#FF0000")
+
) theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
guides(fill = "none")
Air Sciencia
Evaluating Airline Preformance
Introduction
When traveling between NYC’s JFK airport and LA’s LAX, passengers often choose from three major airlines: American, United, and Jetblue. These carriers are evaluated based on key factors like on time performance, safety, airline number of complaints and customer satisfaction, all of which shape passenger experience and define airline reliability.
On time performance is essential as delays can significantly impact schedules and travel plans, Safety is always non negotiable since it is critical for passenger trust and airline reputation, Airline complaints occur frequently, as their reviews shape an airline’s reputation when customers consider traveling through that airline, and Customer Satisfaction is what reflects the quality of service that passengers can expect. This project will delve deep at some of the key aspects of these factors to provide an exploratory analysis of airline reliability, and our aim is to examine how these elements are measured and whether current methodologies accurately reflect and account for uncontrollable factors like delays and cancellations, which have become a commonplace within the airline industry. We will analyze these trends as the project aims to gain a clearer understanding of these trends in the airline industry’s challenges along with how these aspects influence company ratings, offering insights into what truly shapes reliability from both operational and passenger perspectives.
Research Question: So How are we going to help?
We wanted to evaluate how the top 10 domestic airlines that fly between JFK and LAX compare to each other in On-time performance Safety and Customer Satisfaction Between July 2023 and July 2024
Our aim is to discover which airline we would recommend to be the safest, least delay ridden yet with the highest satisfaction
Methodology
Data Selection
For our Safety data we used data from the aviation safety network which can be viewed here: https://asn.flightsafety.org/database/countries/N/
The aviation safety network is part of the Flight Safety Foundation which gives up to date information on airplane accidents and incidents. Our data was derived from their Accident Database which is updated daily and information spanning from 1919. They get the information on these incidents from official governmental agencies, such as air accident investigation boards and civil aviation authorities. This is a very valid source as there is no reason for it to be biased towards any airline or company as it’s just compiling reports from government agencies into easily accessible data.
For our customer satisfaction data we used two different data
https://simpleflying.com/usa-top-10-domestic-airlines/ came from ___ . It can be trusted because it is from ____ . After downloading the data, we will isolate ___ to try to determine ____ .
On time preformance
Airline_Delay_Cause.csv
BTS | OT Delay
This data comes from the Bureau of Transportation Statistics(BTS). This should be considered reliable source data as it is a branch of the Department of Transportation, which is a government agency, and its whole purpose is to produce data about aviation as well as other modes of transportation that can be viewed and used by people.
June 2023 Consumer Submission Data.pdf
This data also comes from a branch of the Department of Transportation(DOT), the U.S. Department of Transportation’s Office of Aviation Consumer Protection. This agency is committed to making sure customers are safe and protected while flying. This makes them the most reliable possible source when it comes to complaints from these customers as any complaints about any airlines will be handled by them to ensure passengers are being treated well and fairly.
The USA’s Top 10 Domestic Airlines By Daily Flights
This was created by simple flying which I have not heard of before, but is a website that posts about aviation news. The fact that it isn’t a well known publication made it worrisome about its data, but cross referencing with other websites it seems they all have roughly the same numbers and it cites its data from OAG, which is used heavily by companies in the aviation industry. Simple Flying compiled the data into a data table that I could web scrape which is why it was chosen over others for our uses.
Results
How do the airline compare in Safety
Most people will agree that safety is and should be the number one concern for airlines. Afterall, they operate ginormous machinery with hundreds of peoples lives at risks for each one of the millions of operations they do every year. We dove right in to analyze this:
Visualization 1
The chart shows that in general planes are extremely as you have such a small chance to have any kind of accident and an even smaller chance to get into a substantial accident where there are any injuries or fatalities. There is no clear best or worst here as they are all doing very well minimizing the crashes. If a best had to be chosen it would American Airlines as they have slightly less accidents per flight.
Visualization 2
<- raw_data_2 %>%
customer slice(-1,-12) %>%
mutate(
Column2 = as.numeric(Column2) + as.numeric(Column3)
%>%
) select(-Column3) %>%
mutate(
Column1 = fct_reorder(Column1, Column2)) %>%
filter(row_number() %in% c(3, 4, 7)) %>%
rename(Airline = Column1) %>%
rename(Complaints = Column2)
<- bind_cols(airline_data, customer) %>%
combined_data_complaints select(Airline, avg_daily_domestic_flights_march, Complaints) %>%
mutate(avg_daily_domestic_flights_march = avg_daily_domestic_flights_march*60) %>%
mutate(complaints_per_flight = Complaints/avg_daily_domestic_flights_march) %>%
mutate(Airline = fct_reorder(Airline, complaints_per_flight))
ggplot(combined_data_complaints, aes(x = complaints_per_flight, y = Airline, fill = Airline))+
geom_col()+
labs(title = "Airline Customer Statisfaction", subtitle = "June 2022, June 2023", x= "Complaints per Flight", y = "") +
scale_fill_manual(values = c("JETBLUE AIRWAYS" = "#003876", "DELTA AIR LINES NETWORK" = "#C01933", "AMERICAN AIRLINES NETWORK" = "#0D73B1"))+
theme(
plot.subtitle = element_text(size = 10)
+
) guides(fill = "none")
The chart shows that there are very few complaints in comparison to how many flights there are across the board since they are all getting less then .02 complaints per flight. This makes sense cause the data comes from the the DOT so only the most serious of complaints make it there. Regardless of that though it is clear from the chart that JetBLue far and away gets the most complaints. It had more then double both American and Delta. This makes Jetblue easily the worst for customer satisfaction while there isn”t a clear best as both delta
Visualization 3
<- raw_data_1 %>%
delay filter(airport %in% c("JFK", "LAX")) %>%
select(carrier_name, airport, carrier_delay) %>%
group_by(carrier_name) %>%
summarize(carrier_delay = sum(carrier_delay, na.rm = TRUE)) %>%
mutate(
carrier_name = fct_reorder(carrier_name, carrier_delay)) %>%
rename(airline = carrier_name) %>%
filter(!airline %in% c("Alaska Airlines Network", "Allegiant Air", "Endeavor Air Inc.", "Frontier Airlines","Hawaiian Airlines Network", "Horizon Air","Republic Airline", "SkyWest Airlines Inc.", "Southwest Airlines","Spirit Airlines", "United Air Lines Network"))#only keep delta, jetblue, and american
<- bind_cols(delay,airline_data) %>%
combined_delay select(-3) %>%
rename(airline = airline...1) %>%
mutate(avg_daily_domestic_flights_march = avg_daily_domestic_flights_march*365) %>%
mutate(delays_per_flight = (carrier_delay/avg_daily_domestic_flights_march))
ggplot(combined_delay, aes(x= delays_per_flight, y= airline, fill = airline)) +
geom_col() +
labs(title = "Average Delay per Flight", subtitle = "July 2023- July 2024" , x= "Delay Time", y = "")+
scale_x_continuous(labels = label_comma()) +
scale_fill_manual(values = c("JetBlue Airways" = "#003876", "Delta Air Lines Network" = "#C01933", "American Airlines Network" = "#0D73B1"))+
theme(
plot.subtitle = element_text(size = 10)
+
) guides(fill = "none")
The chart shows the average amount of delay per flight, so obviously the average delay isn’t only a few minutes. This is if you take the total time of all the delays in a year and divided by the total flights in that same period. It is clear from the chart that JetBlue is atrocious when it comes to delays. It has more then 4 times Delta and American at an average of about 2 minutes per flight while they’re both less then half a minute.