knitr::opts_chunk$set(
warning = FALSE,
message = FALSE)
#data cleans and joins
#adding the data
happiness <- read_excel(here::here("data_raw", 'happinessdata.xls'))
income_inequality <- read_csv(here::here('data_raw',"gini.csv"))
PPP <- read_excel(here::here("data_raw", "PPP.xls"), sheet="Data", skip=3)
PPPP <- read_excel(here::here("data_raw", "PPPP.xls"), sheet="Data", skip=3)
counrty_region <- read_excel(here::here("data_raw", "PPP.xls"), sheet="Metadata - Countries")
#clean happiness names
happiness <- happiness %>%
clean_names()
#clean country regions
counrty_region_clean <- counrty_region %>%
clean_names() %>%
select(-special_notes, -table_name)
#clean happiness with selected variables I think I got them all but you may need to check
happiness_clean<- happiness %>%
filter(!is.na(life_ladder)) %>%
select(country_name, year, life_ladder, log_gdp_per_capita, social_support, healthy_life_expectancy_at_birth, freedom_to_make_life_choices,generosity,perceptions_of_corruption,positive_affect,negative_affect,confidence_in_national_government,democratic_quality)
#ppp clean
#glimpse(PPP)
PPP_clean <- PPP %>%
gather(key='year', value='ppp', "1960":"2019") %>%
mutate(year=as.numeric(year)) %>%
filter(year>2005) %>%
clean_names() %>%
select(-indicator_name, -indicator_code) %>%
filter(!is.na(ppp))
#glimpse(PPP_clean)
#pppp clean
PPPP_clean <- PPPP %>%
gather(key='year', value='pppp', "1960":"2019") %>%
mutate(year=as.numeric(year)) %>%
filter(year>2005) %>%
clean_names() %>%
select(-indicator_name, -indicator_code) %>%
filter(!is.na(pppp))
#glimpse(PPPP_clean)
#income inequality join with happiness
glimpse(income_inequality)
## Observations: 195
## Variables: 242
## $ country <chr> "Afghanistan", "Albania", "Algeria", "Andorra", "Angola"…
## $ `1800` <dbl> 30.5, 38.9, 56.2, 40.0, 57.2, 40.0, 47.7, 31.5, 38.7, 33…
## $ `1801` <dbl> 30.5, 38.9, 56.2, 40.0, 57.2, 40.0, 47.7, 31.5, 38.7, 33…
## $ `1802` <dbl> 30.5, 38.9, 56.2, 40.0, 57.2, 40.0, 47.7, 31.5, 38.7, 33…
## $ `1803` <dbl> 30.5, 38.9, 56.2, 40.0, 57.2, 40.0, 47.7, 31.5, 38.7, 33…
## $ `1804` <dbl> 30.5, 38.9, 56.2, 40.0, 57.2, 40.0, 47.7, 31.5, 38.7, 33…
## $ `1805` <dbl> 30.5, 38.9, 56.2, 40.0, 57.2, 40.0, 47.7, 31.5, 38.7, 33…
## $ `1806` <dbl> 30.5, 38.9, 56.2, 40.0, 57.2, 40.0, 47.7, 31.5, 38.7, 33…
## $ `1807` <dbl> 30.5, 38.9, 56.2, 40.0, 57.2, 40.0, 47.7, 31.5, 38.7, 33…
## $ `1808` <dbl> 30.5, 38.9, 56.2, 40.0, 57.2, 40.0, 47.7, 31.5, 38.7, 33…
## $ `1809` <dbl> 30.5, 38.9, 56.2, 40.0, 57.2, 40.0, 47.7, 31.5, 38.7, 33…
## $ `1810` <dbl> 30.5, 38.9, 56.2, 40.0, 57.2, 40.0, 47.7, 31.5, 38.7, 33…
## $ `1811` <dbl> 30.5, 38.9, 56.2, 40.0, 57.2, 40.0, 47.7, 31.5, 38.7, 33…
## $ `1812` <dbl> 30.5, 38.9, 56.2, 40.0, 57.2, 40.0, 47.7, 31.5, 38.7, 33…
## $ `1813` <dbl> 30.5, 38.9, 56.2, 40.0, 57.2, 40.0, 47.7, 31.5, 38.7, 33…
## $ `1814` <dbl> 30.5, 38.9, 56.2, 40.0, 57.2, 40.0, 47.7, 31.5, 38.7, 33…
## $ `1815` <dbl> 30.5, 38.9, 56.2, 40.0, 57.2, 40.0, 47.7, 31.5, 38.7, 33…
## $ `1816` <dbl> 30.5, 38.9, 56.2, 40.0, 57.2, 40.0, 47.7, 31.5, 38.7, 33…
## $ `1817` <dbl> 30.5, 38.9, 56.2, 40.0, 57.2, 40.0, 47.6, 31.5, 38.7, 33…
## $ `1818` <dbl> 30.5, 38.9, 56.3, 40.0, 57.1, 40.0, 47.5, 31.5, 38.7, 33…
## $ `1819` <dbl> 30.5, 38.9, 56.4, 40.0, 56.9, 40.0, 47.3, 31.5, 38.7, 33…
## $ `1820` <dbl> 30.5, 38.9, 56.5, 40.0, 56.8, 40.0, 47.1, 31.5, 38.7, 32…
## $ `1821` <dbl> 30.5, 38.9, 56.6, 40.0, 56.6, 40.0, 46.8, 31.5, 38.7, 32…
## $ `1822` <dbl> 30.5, 38.9, 56.7, 40.0, 56.4, 40.0, 46.5, 31.5, 38.7, 32…
## $ `1823` <dbl> 30.5, 38.9, 56.8, 40.0, 56.1, 40.0, 46.2, 31.5, 38.7, 32…
## $ `1824` <dbl> 30.5, 38.9, 56.9, 40.0, 55.9, 40.0, 45.8, 31.5, 38.7, 31…
## $ `1825` <dbl> 30.5, 38.9, 57.0, 40.0, 55.7, 40.0, 45.6, 31.5, 38.7, 31…
## $ `1826` <dbl> 30.5, 38.9, 57.2, 40.0, 55.4, 40.0, 45.2, 31.5, 38.7, 31…
## $ `1827` <dbl> 30.5, 38.9, 57.4, 40.0, 55.1, 40.0, 44.8, 31.5, 38.7, 31…
## $ `1828` <dbl> 30.5, 38.9, 57.5, 40.0, 54.7, 40.0, 44.3, 31.5, 38.7, 30…
## $ `1829` <dbl> 30.5, 38.9, 57.7, 40.0, 54.4, 40.0, 43.9, 31.5, 38.7, 30…
## $ `1830` <dbl> 30.5, 38.9, 57.9, 40.0, 54.1, 40.0, 43.4, 31.5, 38.7, 29…
## $ `1831` <dbl> 30.5, 38.9, 58.1, 40.0, 53.7, 40.0, 42.9, 31.5, 38.7, 29…
## $ `1832` <dbl> 30.5, 38.9, 58.2, 40.0, 53.4, 40.0, 42.5, 31.5, 38.7, 29…
## $ `1833` <dbl> 30.5, 38.9, 58.4, 40.0, 53.1, 40.0, 42.0, 31.5, 38.7, 28…
## $ `1834` <dbl> 30.5, 38.9, 58.6, 40.0, 52.7, 40.0, 41.6, 31.5, 38.7, 28…
## $ `1835` <dbl> 30.5, 38.9, 58.8, 40.0, 52.4, 40.0, 41.1, 31.5, 38.7, 28…
## $ `1836` <dbl> 30.5, 38.9, 58.9, 40.0, 52.1, 40.0, 40.7, 31.5, 38.7, 27…
## $ `1837` <dbl> 30.5, 38.9, 59.1, 40.0, 51.7, 40.0, 40.2, 31.5, 38.7, 27…
## $ `1838` <dbl> 30.5, 38.9, 59.3, 40.0, 51.4, 40.0, 39.8, 31.5, 38.7, 27…
## $ `1839` <dbl> 30.5, 38.9, 59.5, 40.0, 51.1, 40.0, 39.3, 31.5, 38.7, 26…
## $ `1840` <dbl> 30.5, 38.9, 59.6, 40.0, 50.7, 40.0, 38.9, 31.5, 38.7, 26…
## $ `1841` <dbl> 30.5, 38.9, 59.8, 40.0, 50.4, 40.0, 38.4, 31.5, 38.7, 26…
## $ `1842` <dbl> 30.5, 38.9, 60.0, 40.0, 50.1, 40.0, 38.0, 31.5, 38.7, 25…
## $ `1843` <dbl> 30.5, 38.9, 60.2, 40.0, 49.7, 40.0, 37.5, 31.5, 38.7, 25…
## $ `1844` <dbl> 30.6, 38.9, 60.3, 40.0, 49.4, 40.0, 37.1, 31.5, 38.7, 24…
## $ `1845` <dbl> 30.7, 38.9, 60.4, 40.0, 49.1, 40.0, 36.7, 31.7, 38.7, 24…
## $ `1846` <dbl> 30.9, 38.9, 60.5, 40.0, 48.7, 40.0, 36.5, 31.9, 38.8, 24…
## $ `1847` <dbl> 31.2, 38.9, 60.4, 40.0, 48.4, 40.0, 36.3, 32.3, 38.8, 24…
## $ `1848` <dbl> 31.5, 38.9, 60.3, 40.0, 48.1, 40.0, 36.2, 32.7, 38.9, 23…
## $ `1849` <dbl> 31.9, 38.9, 60.1, 40.0, 47.7, 40.0, 36.3, 33.3, 39.0, 23…
## $ `1850` <dbl> 32.4, 38.9, 59.8, 40.0, 47.4, 40.0, 36.4, 34.0, 39.1, 23…
## $ `1851` <dbl> 33.0, 38.9, 59.4, 40.0, 47.1, 40.0, 36.7, 34.8, 39.3, 23…
## $ `1852` <dbl> 33.6, 38.9, 59.0, 40.0, 46.7, 40.0, 37.0, 35.8, 39.5, 23…
## $ `1853` <dbl> 34.3, 38.9, 58.5, 40.0, 46.4, 40.0, 37.5, 36.8, 39.6, 22…
## $ `1854` <dbl> 35.0, 38.9, 57.9, 40.0, 46.1, 40.0, 38.0, 38.0, 39.8, 22…
## $ `1855` <dbl> 35.8, 38.9, 57.2, 40.0, 45.7, 40.0, 38.6, 39.3, 40.1, 22…
## $ `1856` <dbl> 36.7, 38.9, 56.5, 40.0, 45.4, 40.0, 39.4, 40.7, 40.3, 22…
## $ `1857` <dbl> 37.7, 38.9, 55.7, 40.0, 45.1, 40.0, 40.2, 42.3, 40.6, 22…
## $ `1858` <dbl> 38.6, 38.9, 54.8, 40.0, 44.7, 40.0, 41.1, 43.9, 40.9, 22…
## $ `1859` <dbl> 39.6, 38.9, 53.9, 40.0, 44.4, 40.0, 42.1, 45.5, 41.2, 22…
## $ `1860` <dbl> 40.5, 38.9, 53.0, 40.0, 44.1, 40.0, 43.0, 47.2, 41.5, 22…
## $ `1861` <dbl> 41.5, 38.9, 52.1, 40.0, 43.7, 40.0, 44.0, 48.8, 41.8, 22…
## $ `1862` <dbl> 42.4, 38.9, 51.2, 40.0, 43.4, 40.0, 44.9, 50.5, 42.1, 22…
## $ `1863` <dbl> 43.4, 38.9, 50.3, 40.0, 43.1, 40.0, 45.8, 52.1, 42.4, 22…
## $ `1864` <dbl> 44.3, 38.9, 49.4, 40.0, 42.7, 40.0, 46.8, 53.8, 42.6, 22…
## $ `1865` <dbl> 45.0, 38.9, 48.5, 40.0, 42.4, 40.0, 47.6, 55.2, 42.9, 22…
## $ `1866` <dbl> 45.7, 38.8, 47.7, 40.0, 42.1, 40.0, 48.4, 56.4, 43.1, 22…
## $ `1867` <dbl> 46.2, 38.7, 46.9, 40.0, 41.7, 40.0, 49.0, 57.4, 43.2, 22…
## $ `1868` <dbl> 46.7, 38.5, 46.1, 40.0, 41.4, 40.0, 49.6, 58.2, 43.3, 22…
## $ `1869` <dbl> 47.0, 38.3, 45.3, 40.0, 41.1, 40.0, 50.0, 58.7, 43.4, 22…
## $ `1870` <dbl> 47.2, 38.0, 44.5, 40.0, 40.7, 40.0, 50.4, 59.0, 43.4, 22…
## $ `1871` <dbl> 47.3, 37.7, 43.8, 40.0, 40.4, 40.0, 50.7, 59.1, 43.3, 22…
## $ `1872` <dbl> 47.2, 37.4, 43.1, 40.0, 40.1, 40.0, 50.9, 59.0, 43.2, 22…
## $ `1873` <dbl> 47.1, 37.0, 42.4, 40.0, 39.7, 40.0, 51.0, 58.7, 43.1, 22…
## $ `1874` <dbl> 46.8, 36.6, 41.8, 40.0, 39.4, 40.0, 50.9, 58.1, 42.9, 22…
## $ `1875` <dbl> 46.4, 36.1, 41.1, 40.0, 39.1, 40.0, 50.8, 57.3, 42.7, 22…
## $ `1876` <dbl> 45.9, 35.6, 40.5, 40.0, 38.7, 40.0, 50.7, 56.3, 42.4, 22…
## $ `1877` <dbl> 45.3, 35.0, 39.9, 40.0, 38.4, 40.0, 50.4, 55.1, 42.0, 22…
## $ `1878` <dbl> 44.7, 34.4, 39.4, 40.0, 38.1, 40.0, 50.0, 53.6, 41.7, 22…
## $ `1879` <dbl> 44.0, 33.8, 38.8, 40.0, 37.7, 40.0, 49.6, 52.2, 41.3, 22…
## $ `1880` <dbl> 43.4, 33.2, 38.2, 40.0, 37.4, 40.0, 49.2, 50.7, 40.9, 22…
## $ `1881` <dbl> 42.7, 32.6, 37.7, 40.0, 37.1, 40.0, 48.8, 49.3, 40.5, 22…
## $ `1882` <dbl> 42.1, 31.9, 37.1, 40.0, 36.7, 40.0, 48.4, 47.8, 40.1, 22…
## $ `1883` <dbl> 41.4, 31.3, 36.6, 40.0, 36.4, 40.0, 48.1, 46.4, 39.7, 22…
## $ `1884` <dbl> 40.8, 30.7, 36.0, 40.0, 36.1, 40.0, 47.7, 44.9, 39.4, 22…
## $ `1885` <dbl> 40.2, 30.2, 35.5, 40.0, 35.8, 40.0, 47.3, 43.6, 39.0, 22…
## $ `1886` <dbl> 39.7, 29.6, 35.0, 40.0, 35.6, 40.0, 47.0, 42.4, 38.7, 22…
## $ `1887` <dbl> 39.2, 29.2, 34.6, 40.0, 35.5, 40.0, 46.8, 41.3, 38.4, 22…
## $ `1888` <dbl> 38.8, 28.7, 34.3, 40.0, 35.5, 40.0, 46.6, 40.3, 38.1, 22…
## $ `1889` <dbl> 38.5, 28.3, 34.0, 40.0, 35.5, 40.0, 46.5, 39.5, 37.9, 23…
## $ `1890` <dbl> 38.3, 28.0, 33.7, 40.0, 35.6, 40.0, 46.4, 38.7, 37.7, 23…
## $ `1891` <dbl> 38.1, 27.6, 33.5, 40.0, 35.8, 40.0, 46.4, 38.1, 37.6, 23…
## $ `1892` <dbl> 38.0, 27.4, 33.3, 40.0, 36.0, 40.0, 46.4, 37.6, 37.4, 23…
## $ `1893` <dbl> 38.0, 27.1, 33.2, 40.0, 36.4, 40.0, 46.4, 37.2, 37.3, 23…
## $ `1894` <dbl> 38.0, 26.9, 33.2, 40.0, 36.8, 40.0, 46.5, 37.0, 37.3, 23…
## $ `1895` <dbl> 38.1, 26.8, 33.2, 40.0, 37.2, 40.0, 46.7, 36.8, 37.3, 23…
## $ `1896` <dbl> 38.2, 26.7, 33.3, 40.0, 37.8, 40.0, 46.9, 36.8, 37.3, 23…
## $ `1897` <dbl> 38.5, 26.6, 33.4, 40.0, 38.4, 40.0, 47.1, 36.9, 37.3, 23…
## $ `1898` <dbl> 38.8, 26.6, 33.5, 40.0, 39.1, 40.0, 47.5, 37.1, 37.4, 23…
## $ `1899` <dbl> 39.1, 26.5, 33.7, 40.0, 39.8, 40.0, 47.8, 37.4, 37.4, 23…
## $ `1900` <dbl> 39.4, 26.5, 33.8, 40.0, 40.5, 40.0, 48.1, 37.6, 37.5, 23…
## $ `1901` <dbl> 39.7, 26.5, 34.0, 40.0, 41.2, 40.0, 48.4, 37.8, 37.5, 23…
## $ `1902` <dbl> 39.9, 26.4, 34.2, 40.0, 41.9, 40.0, 48.7, 38.0, 37.6, 23…
## $ `1903` <dbl> 40.2, 26.4, 34.3, 40.0, 42.6, 40.0, 49.0, 38.3, 37.7, 23…
## $ `1904` <dbl> 40.5, 26.4, 34.5, 40.0, 43.3, 40.0, 49.3, 38.5, 37.7, 23…
## $ `1905` <dbl> 40.8, 26.3, 34.6, 40.0, 44.0, 40.0, 49.5, 38.7, 37.8, 23…
## $ `1906` <dbl> 41.0, 26.3, 34.8, 40.0, 44.7, 40.0, 49.8, 38.9, 37.8, 23…
## $ `1907` <dbl> 41.2, 26.3, 35.0, 40.0, 45.4, 40.0, 49.9, 39.1, 37.8, 23…
## $ `1908` <dbl> 41.3, 26.3, 35.2, 40.0, 46.1, 40.0, 50.1, 39.2, 37.8, 23…
## $ `1909` <dbl> 41.5, 26.3, 35.3, 40.0, 46.8, 40.0, 50.2, 39.4, 37.8, 23…
## $ `1910` <dbl> 41.5, 26.2, 35.5, 40.0, 47.5, 40.0, 50.2, 39.5, 37.7, 23…
## $ `1911` <dbl> 41.5, 26.2, 35.7, 40.0, 48.3, 40.0, 50.2, 39.6, 37.6, 23…
## $ `1912` <dbl> 41.5, 26.2, 35.9, 40.0, 49.0, 40.0, 50.2, 39.7, 37.5, 23…
## $ `1913` <dbl> 41.5, 26.2, 36.0, 40.0, 49.7, 40.0, 50.2, 39.8, 37.4, 23…
## $ `1914` <dbl> 41.4, 26.2, 36.2, 40.0, 50.4, 40.0, 50.0, 39.9, 37.3, 23…
## $ `1915` <dbl> 41.3, 26.2, 36.4, 40.0, 51.2, 40.0, 49.9, 40.0, 37.1, 23…
## $ `1916` <dbl> 41.1, 26.2, 36.6, 40.0, 51.9, 40.0, 49.7, 40.0, 36.7, 23…
## $ `1917` <dbl> 40.9, 26.2, 36.8, 40.0, 52.7, 40.0, 49.5, 40.1, 36.6, 23…
## $ `1918` <dbl> 40.6, 26.2, 37.0, 40.0, 53.4, 40.0, 49.2, 40.1, 36.4, 23…
## $ `1919` <dbl> 40.4, 26.2, 37.2, 40.0, 54.2, 40.0, 48.9, 40.1, 36.2, 23…
## $ `1920` <dbl> 40.2, 26.3, 37.4, 40.0, 54.9, 40.0, 48.6, 40.1, 35.9, 23…
## $ `1921` <dbl> 39.9, 26.3, 37.6, 40.0, 55.7, 40.0, 48.3, 40.1, 35.8, 23…
## $ `1922` <dbl> 39.7, 26.3, 37.8, 40.0, 56.4, 40.0, 48.1, 40.2, 35.8, 23…
## $ `1923` <dbl> 39.4, 26.3, 38.0, 40.0, 57.2, 40.0, 47.8, 40.2, 35.5, 23…
## $ `1924` <dbl> 39.2, 26.3, 38.2, 40.0, 57.9, 40.0, 47.5, 40.2, 35.0, 23…
## $ `1925` <dbl> 39.0, 26.3, 38.3, 40.0, 58.5, 40.0, 47.2, 40.2, 34.4, 23…
## $ `1926` <dbl> 38.7, 26.3, 38.5, 40.0, 59.0, 40.0, 47.0, 40.3, 33.8, 23…
## $ `1927` <dbl> 38.5, 26.3, 38.7, 40.0, 59.5, 40.0, 46.6, 40.3, 33.5, 23…
## $ `1928` <dbl> 38.3, 26.3, 38.8, 40.0, 59.9, 40.0, 46.3, 40.3, 33.2, 23…
## $ `1929` <dbl> 38.0, 26.3, 38.9, 40.0, 60.2, 40.0, 46.1, 40.3, 33.0, 23…
## $ `1930` <dbl> 37.8, 26.4, 39.0, 40.0, 60.4, 40.0, 46.0, 40.3, 33.1, 23…
## $ `1931` <dbl> 37.6, 26.4, 39.1, 40.0, 60.6, 40.0, 46.0, 40.4, 32.7, 23…
## $ `1932` <dbl> 37.4, 26.4, 39.2, 40.0, 60.7, 40.0, 46.1, 40.4, 32.4, 23…
## $ `1933` <dbl> 37.1, 26.4, 39.3, 40.0, 60.7, 40.0, 46.1, 40.4, 32.3, 23…
## $ `1934` <dbl> 36.9, 26.4, 39.3, 40.0, 60.7, 40.0, 46.2, 40.4, 32.1, 23…
## $ `1935` <dbl> 36.7, 26.4, 39.4, 40.0, 60.7, 40.0, 46.0, 40.4, 31.3, 23…
## $ `1936` <dbl> 36.5, 26.4, 39.4, 40.0, 60.4, 40.0, 46.2, 40.5, 31.5, 23…
## $ `1937` <dbl> 36.3, 26.4, 39.4, 40.0, 60.1, 40.0, 46.7, 40.5, 31.9, 23…
## $ `1938` <dbl> 36.1, 26.4, 39.4, 40.0, 59.9, 40.0, 47.3, 40.5, 32.2, 23…
## $ `1939` <dbl> 35.9, 26.4, 39.4, 40.0, 59.6, 40.0, 48.5, 40.5, 32.3, 23…
## $ `1940` <dbl> 35.7, 26.5, 39.4, 40.0, 59.4, 40.0, 49.5, 40.6, 31.9, 23…
## $ `1941` <dbl> 35.5, 26.5, 39.5, 40.0, 59.1, 40.0, 50.1, 40.6, 31.2, 23…
## $ `1942` <dbl> 35.2, 26.5, 39.5, 40.0, 58.9, 40.0, 50.4, 40.6, 30.7, 23…
## $ `1943` <dbl> 35.0, 26.5, 39.5, 40.0, 58.6, 40.0, 50.9, 40.6, 30.9, 23…
## $ `1944` <dbl> 34.8, 26.5, 39.5, 40.0, 58.4, 40.0, 51.3, 40.6, 31.1, 23…
## $ `1945` <dbl> 34.6, 26.5, 39.5, 40.0, 58.1, 40.0, 51.1, 40.7, 31.2, 23…
## $ `1946` <dbl> 34.4, 26.5, 39.5, 40.0, 57.9, 40.0, 51.0, 40.7, 32.4, 23…
## $ `1947` <dbl> 34.3, 26.5, 39.5, 40.0, 57.6, 40.0, 50.3, 40.7, 32.7, 23…
## $ `1948` <dbl> 34.2, 26.5, 39.6, 40.0, 57.4, 40.0, 49.2, 40.7, 33.1, 23…
## $ `1949` <dbl> 34.1, 26.5, 39.6, 40.0, 57.2, 40.0, 47.7, 40.7, 33.4, 23…
## $ `1950` <dbl> 34.0, 26.6, 39.6, 40.0, 57.0, 40.0, 46.4, 40.8, 35.1, 24…
## $ `1951` <dbl> 34.0, 26.6, 39.6, 40.0, 56.8, 40.0, 45.7, 40.8, 35.8, 24…
## $ `1952` <dbl> 34.0, 26.6, 39.6, 40.0, 56.6, 40.0, 44.7, 40.8, 36.2, 24…
## $ `1953` <dbl> 34.1, 26.6, 39.6, 40.0, 56.5, 40.0, 43.6, 40.8, 36.5, 25…
## $ `1954` <dbl> 34.3, 26.6, 39.7, 40.0, 56.4, 40.0, 42.9, 40.9, 36.5, 25…
## $ `1955` <dbl> 34.4, 26.6, 39.7, 40.0, 56.3, 40.0, 41.9, 40.9, 35.4, 26…
## $ `1956` <dbl> 34.5, 26.6, 39.7, 40.0, 56.2, 40.0, 42.0, 40.9, 35.2, 27…
## $ `1957` <dbl> 34.6, 26.6, 39.7, 40.0, 56.1, 40.0, 42.2, 40.9, 35.2, 27…
## $ `1958` <dbl> 34.6, 26.6, 39.7, 40.0, 56.0, 40.0, 42.4, 40.9, 35.2, 28…
## $ `1959` <dbl> 34.5, 26.6, 39.7, 40.0, 55.9, 40.0, 41.9, 41.0, 35.3, 28…
## $ `1960` <dbl> 34.4, 26.7, 39.7, 40.0, 55.8, 40.0, 41.4, 41.0, 35.2, 29…
## $ `1961` <dbl> 34.1, 26.7, 39.8, 40.0, 55.7, 40.0, 41.4, 41.0, 35.3, 29…
## $ `1962` <dbl> 33.7, 26.7, 39.8, 40.0, 55.6, 40.0, 41.5, 41.0, 35.4, 29…
## $ `1963` <dbl> 33.2, 26.7, 39.8, 40.0, 55.5, 40.0, 40.7, 41.1, 35.2, 29…
## $ `1964` <dbl> 32.7, 26.7, 39.8, 40.0, 55.4, 40.0, 40.0, 41.1, 35.1, 29…
## $ `1965` <dbl> 32.2, 26.7, 39.8, 40.0, 55.3, 40.0, 39.4, 41.1, 35.1, 29…
## $ `1966` <dbl> 31.7, 26.7, 39.8, 40.0, 55.2, 40.0, 38.8, 41.1, 34.8, 29…
## $ `1967` <dbl> 31.2, 26.7, 39.9, 40.0, 55.2, 40.0, 38.2, 41.1, 34.7, 29…
## $ `1968` <dbl> 30.8, 26.7, 39.9, 40.0, 55.1, 40.0, 37.6, 41.2, 34.5, 29…
## $ `1969` <dbl> 30.5, 26.7, 39.9, 40.0, 54.9, 40.0, 36.7, 41.2, 34.2, 29…
## $ `1970` <dbl> 30.5, 26.8, 39.9, 40.0, 54.8, 40.0, 36.1, 41.2, 33.8, 29…
## $ `1971` <dbl> 30.8, 26.8, 39.9, 40.0, 54.7, 40.0, 35.6, 41.2, 33.6, 29…
## $ `1972` <dbl> 31.2, 26.8, 39.9, 40.0, 54.6, 40.0, 35.3, 41.3, 33.4, 28…
## $ `1973` <dbl> 31.6, 26.8, 40.0, 40.0, 54.5, 40.0, 35.2, 41.3, 33.3, 28…
## $ `1974` <dbl> 31.9, 26.8, 40.0, 40.0, 54.4, 40.0, 35.5, 41.3, 33.2, 27…
## $ `1975` <dbl> 32.1, 26.8, 40.0, 40.0, 54.4, 40.0, 36.1, 41.3, 33.1, 27…
## $ `1976` <dbl> 32.2, 26.8, 40.0, 40.0, 54.3, 40.0, 36.9, 41.3, 33.1, 26…
## $ `1977` <dbl> 32.2, 26.8, 40.0, 40.0, 54.2, 40.0, 37.9, 41.4, 33.0, 26…
## $ `1978` <dbl> 32.2, 26.8, 40.0, 40.0, 54.1, 40.0, 38.8, 41.4, 32.8, 25…
## $ `1979` <dbl> 32.2, 26.8, 40.1, 40.0, 54.0, 40.0, 39.7, 41.4, 32.5, 25…
## $ `1980` <dbl> 32.2, 26.9, 40.1, 40.0, 53.9, 40.0, 40.4, 41.4, 32.2, 24…
## $ `1981` <dbl> 32.1, 26.9, 40.1, 40.0, 53.8, 40.0, 41.0, 41.5, 32.0, 24…
## $ `1982` <dbl> 32.0, 26.9, 40.1, 40.0, 53.7, 40.0, 41.5, 41.5, 31.9, 24…
## $ `1983` <dbl> 32.1, 26.9, 40.1, 40.0, 53.6, 40.0, 41.8, 41.5, 31.9, 24…
## $ `1984` <dbl> 32.5, 26.9, 40.1, 40.0, 53.5, 40.0, 42.1, 41.5, 32.2, 25…
## $ `1985` <dbl> 33.0, 26.9, 40.1, 40.0, 53.4, 40.0, 42.9, 41.5, 32.4, 26…
## $ `1986` <dbl> 33.7, 26.9, 40.2, 40.0, 53.3, 40.0, 43.7, 41.6, 32.6, 26…
## $ `1987` <dbl> 34.7, 26.9, 40.0, 40.0, 53.2, 40.0, 44.5, 41.6, 32.8, 27…
## $ `1988` <dbl> 35.4, 26.9, 39.8, 40.0, 53.1, 40.0, 45.3, 41.6, 32.9, 28…
## $ `1989` <dbl> 36.0, 26.9, 39.4, 40.0, 53.0, 40.0, 46.1, 41.7, 33.0, 28…
## $ `1990` <dbl> 36.4, 27.0, 38.8, 40.0, 52.9, 40.0, 46.1, 41.9, 33.0, 28…
## $ `1991` <dbl> 36.7, 27.0, 38.1, 40.0, 52.8, 40.0, 45.9, 42.2, 33.0, 28…
## $ `1992` <dbl> 36.7, 27.0, 37.4, 40.0, 52.8, 40.0, 45.9, 42.6, 32.9, 27…
## $ `1993` <dbl> 36.8, 27.0, 36.7, 40.0, 52.7, 40.0, 46.4, 43.0, 32.8, 27…
## $ `1994` <dbl> 36.8, 27.0, 36.1, 40.0, 52.6, 40.0, 46.9, 43.5, 32.7, 27…
## $ `1995` <dbl> 36.8, 27.2, 35.5, 40.0, 52.5, 40.0, 47.7, 43.3, 32.7, 27…
## $ `1996` <dbl> 36.8, 27.5, 34.9, 40.0, 52.4, 40.0, 48.8, 42.5, 32.8, 27…
## $ `1997` <dbl> 36.8, 28.0, 34.4, 40.0, 52.3, 40.0, 49.6, 41.0, 32.9, 27…
## $ `1998` <dbl> 36.8, 28.6, 34.0, 40.0, 52.2, 40.0, 50.0, 39.4, 33.0, 27…
## $ `1999` <dbl> 36.8, 29.4, 33.5, 40.0, 52.1, 40.0, 50.8, 37.6, 33.2, 28…
## $ `2000` <dbl> 36.8, 30.2, 33.1, 40.0, 51.8, 40.0, 51.7, 36.2, 33.3, 28…
## $ `2001` <dbl> 36.8, 30.7, 32.6, 40.0, 51.3, 40.0, 51.7, 35.0, 33.4, 29…
## $ `2002` <dbl> 36.8, 31.0, 32.2, 40.0, 50.6, 40.0, 51.4, 35.3, 33.5, 29…
## $ `2003` <dbl> 36.8, 31.1, 31.7, 40.0, 49.7, 40.0, 50.8, 35.3, 33.7, 29…
## $ `2004` <dbl> 36.8, 31.0, 31.2, 40.0, 48.5, 40.0, 49.4, 34.2, 34.0, 29…
## $ `2005` <dbl> 36.8, 30.7, 30.8, 40.0, 47.3, 40.0, 47.9, 33.5, 34.3, 29…
## $ `2006` <dbl> 36.8, 30.4, 30.3, 40.0, 46.2, 40.0, 46.7, 32.7, 34.7, 29…
## $ `2007` <dbl> 36.8, 30.2, 29.9, 40.0, 45.0, 40.0, 45.8, 30.8, 34.9, 30…
## $ `2008` <dbl> 36.8, 30.0, 29.4, 40.0, 44.1, 40.0, 44.9, 29.6, 35.0, 30…
## $ `2009` <dbl> 36.8, 29.7, 29.0, 40.0, 43.4, 40.0, 44.0, 29.6, 34.2, 30…
## $ `2010` <dbl> 36.8, 29.5, 28.5, 40.0, 42.9, 40.0, 43.0, 29.2, 33.6, 30…
## $ `2011` <dbl> 36.8, 29.3, 28.2, 40.0, 42.7, 40.0, 42.3, 29.5, 32.9, 30…
## $ `2012` <dbl> 36.8, 29.1, 27.9, 40.0, 42.6, 40.0, 41.8, 30.2, 32.4, 30…
## $ `2013` <dbl> 36.8, 29.0, 27.7, 40.0, 42.6, 40.0, 41.6, 30.7, 31.9, 30…
## $ `2014` <dbl> 36.8, 29.0, 27.6, 40.0, 42.6, 40.0, 41.6, 31.3, 32.2, 30…
## $ `2015` <dbl> 36.8, 29.0, 27.6, 40.0, 42.6, 40.0, 41.8, 31.9, 32.3, 30…
## $ `2016` <dbl> 36.8, 29.0, 27.6, 40.0, 42.6, 40.0, 42.1, 32.3, 32.3, 30…
## $ `2017` <dbl> 36.8, 29.0, 27.6, 40.0, 42.6, 40.0, 42.3, 32.5, 32.3, 30…
## $ `2018` <dbl> 36.8, 29.0, 27.6, 40.0, 42.6, 40.0, 42.4, 32.6, 32.3, 30…
## $ `2019` <dbl> 36.8, 29.0, 27.6, 40.0, 42.6, 40.0, 42.4, 32.6, 32.3, 30…
## $ `2020` <dbl> 36.8, 29.0, 27.6, 40.0, 42.6, 40.0, 42.4, 32.6, 32.3, 30…
## $ `2021` <dbl> 36.8, 29.0, 27.6, 40.0, 42.6, 40.0, 42.4, 32.6, 32.3, 30…
## $ `2022` <dbl> 36.8, 29.0, 27.6, 40.0, 42.6, 40.0, 42.4, 32.6, 32.3, 30…
## $ `2023` <dbl> 36.8, 29.0, 27.6, 40.0, 42.6, 40.0, 42.4, 32.6, 32.3, 30…
## $ `2024` <dbl> 36.8, 29.0, 27.6, 40.0, 42.6, 40.0, 42.4, 32.6, 32.3, 30…
## $ `2025` <dbl> 36.8, 29.0, 27.6, 40.0, 42.6, 40.0, 42.4, 32.6, 32.3, 30…
## $ `2026` <dbl> 36.8, 29.0, 27.6, 40.0, 42.6, 40.0, 42.4, 32.6, 32.3, 30…
## $ `2027` <dbl> 36.8, 29.0, 27.6, 40.0, 42.6, 40.0, 42.4, 32.6, 32.3, 30…
## $ `2028` <dbl> 36.8, 29.0, 27.6, 40.0, 42.6, 40.0, 42.4, 32.6, 32.3, 30…
## $ `2029` <dbl> 36.8, 29.0, 27.6, 40.0, 42.6, 40.0, 42.4, 32.6, 32.3, 30…
## $ `2030` <dbl> 36.8, 29.0, 27.6, 40.0, 42.6, 40.0, 42.4, 32.6, 32.3, 30…
## $ `2031` <dbl> 36.8, 29.0, 27.6, 40.0, 42.6, 40.0, 42.4, 32.6, 32.3, 30…
## $ `2032` <dbl> 36.8, 29.0, 27.6, 40.0, 42.6, 40.0, 42.4, 32.6, 32.3, 30…
## $ `2033` <dbl> 36.8, 29.0, 27.6, 40.0, 42.6, 40.0, 42.4, 32.6, 32.3, 30…
## $ `2034` <dbl> 36.8, 29.0, 27.6, 40.0, 42.6, 40.0, 42.4, 32.6, 32.3, 30…
## $ `2035` <dbl> 36.8, 29.0, 27.6, 40.0, 42.6, 40.0, 42.4, 32.6, 32.3, 30…
## $ `2036` <dbl> 36.8, 29.0, 27.6, 40.0, 42.6, 40.0, 42.4, 32.6, 32.3, 30…
## $ `2037` <dbl> 36.8, 29.0, 27.6, 40.0, 42.6, 40.0, 42.4, 32.6, 32.3, 30…
## $ `2038` <dbl> 36.8, 29.0, 27.6, 40.0, 42.6, 40.0, 42.4, 32.6, 32.3, 30…
## $ `2039` <dbl> 36.8, 29.0, 27.6, 40.0, 42.6, 40.0, 42.4, 32.6, 32.3, 30…
## $ `2040` <dbl> 36.8, 29.0, 27.6, 40.0, 42.6, 40.0, 42.4, 32.6, 32.3, 30…
income_inequality_clean <- income_inequality %>%
gather(key='year', value='inequality', "1880":"2020") %>%
filter(year > 2005)%>%
select(country, year,inequality) %>%
mutate(year=as.double(year)) %>%
inner_join(happiness_clean, by=c("country"="country_name", "year"="year"))
#glimpse(income_inequality_clean)
#data full
full_data <- income_inequality_clean %>%
inner_join(PPP_clean, by=c("country"="country_name", 'year'='year')) %>%
inner_join(PPPP_clean, by=c("country"="country_name", 'year'='year')) %>%
select(-country_code.x) %>%
inner_join(counrty_region_clean, by=c("country_code.y"="country_code"))
glimpse(full_data)
## Observations: 1,403
## Variables: 19
## $ country <chr> "Armenia", "Austria", "Azerbai…
## $ year <dbl> 2006, 2006, 2006, 2006, 2006, …
## $ inequality <dbl> 32.7, 29.8, 31.9, 33.0, 28.0, …
## $ life_ladder <dbl> 4.289311, 7.122211, 4.727871, …
## $ log_gdp_per_capita <dbl> 8.717719, 10.657212, 9.279043,…
## $ social_support <dbl> 0.6818768, 0.9363504, 0.854414…
## $ healthy_life_expectancy_at_birth <dbl> 64.80, 70.76, 61.88, 59.02, 61…
## $ freedom_to_make_life_choices <dbl> 0.5201978, 0.9413823, 0.771528…
## $ generosity <dbl> -0.216674119, 0.300667107, -0.…
## $ perceptions_of_corruption <dbl> 0.8495131, 0.4901112, 0.774117…
## $ positive_affect <dbl> 0.4941210, 0.8231047, 0.511687…
## $ negative_affect <dbl> 0.4694188, 0.1738117, 0.275695…
## $ confidence_in_national_government <dbl> 0.3443375, 0.4970378, 0.754706…
## $ democratic_quality <dbl> -0.50248164, 1.22430921, -1.18…
## $ ppp <dbl> 1.600727e+02, 8.609180e-01, 2.…
## $ country_code.y <chr> "ARM", "AUT", "AZE", "BGD", "B…
## $ pppp <dbl> 149.7049751, 0.8785690, 0.2253…
## $ region <chr> "Europe & Central Asia", "Euro…
## $ income_group <chr> "Upper middle income", "High i…
# IMPORTING MY DATA
suicide <- read_csv(here::here('data_raw',"newdata3.csv"))
income_per_person <- read_csv(here::here('data_raw',"income_per_person.csv"))
WH_2019 <- read_csv(here::here('data_raw',"WH_2019.csv"))
WH_2018 <- read_csv(here::here('data_raw',"WH_2018.csv"))
WH_2017 <- read_csv(here::here('data_raw',"WH_2017.csv"))
WH_2016 <- read_csv(here::here('data_raw',"WH_2016.csv"))
WH_2015 <- read_csv(here::here('data_raw',"WH_2015.csv"))
#glimpse(WH_2019)
# CLEANING THE WH DATA AND GETTING THE SCORES
#################################################################
WH2019 <- WH_2019 %>%
janitor::clean_names() %>%
mutate(year = 2019) %>%
rename(
rank = overall_rank,
life_ladder = score,
log_gdp_per_capita = gdp_per_capita,
healthy_life_expectancy_at_birth = healthy_life_expectancy,
country = country_or_region) %>%
select(-rank)
#glimpse(WH2019)
# JOIN THE WH DATA TO THE HAPPINESS DATA
adjusted_full <- full_data %>%
full_join(WH2019)
#glimpse(adjusted_full)
# FILTER THE INCOME DATA
#################################################################
income <- income_per_person %>%
gather(key = "year", value = "income_per_person", -country) %>%
mutate(year = as.double(year)) %>%
filter(year %in% c(2015, 2016, 2017, 2018, 2019))
#glimpse(income)
# full join this to djuated data by year and country
WH_Income <- adjusted_full %>%
full_join(income, by = c("country", "year"))
#glimpse(WH_Income)
# FILTER SUICIDE DATA TO JUST HAVE COUNTRY YEAR, SUICIDE RATE
suicide_rates <- suicide %>%
gather(key = "year", value = "suicide_rate", -country, -sex) %>%
mutate(year = as.double(year)) %>%
filter(sex == "Both sexes") %>%
select(-sex)
#glimpse(suicide_rates)
# JOIN TO THE ADJUSTED DATA SET
final_data <- WH_Income %>%
full_join(suicide_rates, by = c("country", "year"))
final_data <- final_data%>%
rename(
'lifeExpAtBirth' = 'healthy_life_expectancy_at_birth',
'freedom' = 'freedom_to_make_life_choices',
'trustInGov' = 'confidence_in_national_government',
'demoQuality' = 'democratic_quality',
'incomePperson' = 'income_per_person',
'suicideR' = 'suicide_rate',
'perceptionCorrupt' = 'perceptions_of_corruption'
)
map_data <- final_data%>%
filter(year == 2019)%>%
select(life_ladder, region=country)%>%
spread(key = region, value = life_ladder)%>%
rename('USA' = 'United States')%>%
gather(key= 'region', value = 'life_ladder', Afghanistan:Zimbabwe)
glimpse(final_data)
## Observations: 2,421
## Variables: 21
## $ country <chr> "Armenia", "Austria", "Azerbaijan", "Banglade…
## $ year <dbl> 2006, 2006, 2006, 2006, 2006, 2006, 2006, 200…
## $ inequality <dbl> 32.7, 29.8, 31.9, 33.0, 28.0, 40.2, 55.1, 62.…
## $ life_ladder <dbl> 4.289311, 7.122211, 4.727871, 4.318909, 5.657…
## $ log_gdp_per_capita <dbl> 8.717719, 10.657212, 9.279043, 7.616417, 9.42…
## $ social_support <dbl> 0.6818768, 0.9363504, 0.8544149, 0.6720022, 0…
## $ lifeExpAtBirth <dbl> 64.80, 70.76, 61.88, 59.02, 61.10, 50.10, 59.…
## $ freedom <dbl> 0.5201978, 0.9413823, 0.7715282, 0.6116642, 0…
## $ generosity <dbl> -0.216674119, 0.300667107, -0.253262460, 0.07…
## $ perceptionCorrupt <dbl> 0.8495131, 0.4901112, 0.7741172, 0.7859162, 0…
## $ positive_affect <dbl> 0.4941210, 0.8231047, 0.5116876, 0.5999454, 0…
## $ negative_affect <dbl> 0.4694188, 0.1738117, 0.2756951, 0.3207928, 0…
## $ trustInGov <dbl> 0.3443375, 0.4970378, 0.7547066, 0.6137370, 0…
## $ demoQuality <dbl> -0.50248164, 1.22430921, -1.18666148, -0.9783…
## $ ppp <dbl> 1.600727e+02, 8.609180e-01, 2.211839e-01, 1.7…
## $ country_code.y <chr> "ARM", "AUT", "AZE", "BGD", "BLR", "BEN", "BO…
## $ pppp <dbl> 149.7049751, 0.8785690, 0.2253549, 18.3772035…
## $ region <chr> "Europe & Central Asia", "Europe & Central As…
## $ income_group <chr> "Upper middle income", "High income", "Upper …
## $ incomePperson <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
## $ suicideR <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
The World Happiness Report is a landmark survey of the state of global happiness. The first report was published in 2012 and continues to gain global recognition as governments and organizations increasingly use happiness indicators to better increase the state of their country. Measurements of well-being can be used effectively to assess the progress of nations.
In this report we look into the pursuit of happiness. This ‘pursuit’ has been a vital part of life as we know it. But what really does happiness look like across the globe? The pursuit of happiness may look different person to person, but are there similarites amoungst nations? Can regions increase their happiness score by utilizing specific factors of life?
What economic and social factors have the most significant impact on the overall level of happiness across the globe?
In our research we utilize four very specific sources in order to reflect the most up to date and trusted variables in accordance to happiness.
This data provided original statistics on over 150 countries around the world. The variables included are appended at the end of this report. The happiness scores were developed from the Gallup World Poll. People answered a main life evaluation question that was asked as a Cantril Ladder. Each person was asked on a scale of 0 (worst life) to 10 (best life), how they would rate their living situation right now. Gallup then used weights to make the estimates representative of the country as a whole.
Information in this set was retrieved from the Kaggle dataset, which was taken directly from the World Health Organization (W.H.O.). This information was gathered by the Global Health Observatory data repository. As more recent and revised data is released, the data is updated.
The data source provides information such as country name, year, Gross Domestic Product (GDP), and Income per person for that country. Gapminder is an independent foundation that focuses on the collection of world data. The data is collected in collaboration with universities, The UN, public agencies, and government agencies around the world.
This allowed us to find global statistics on the Purchasing Power Parity (PPP). The data comes straight from the World Bank. The data set provides statistics on the PPP from years 1990 - 2018. These world development indicators are compiled from officially recognized international sources that represent the “most current and accurate global development data available”.It includes national, regional, and global estimates, therefore this data is vaild and can be trusted.
#map graph
#view(world)
world <- map_data('world') %>%
left_join(map_data, by = 'region') %>%
filter(region!="Antarctica")
glimpse(world)
## Observations: 94,680
## Variables: 7
## $ long <dbl> -69.89912, -69.89571, -69.94219, -70.00415, -70.0661…
## $ lat <dbl> 12.45200, 12.42300, 12.43853, 12.50049, 12.54697, 12…
## $ group <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2…
## $ order <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 12, 13, 14, 15, 16, 1…
## $ region <chr> "Aruba", "Aruba", "Aruba", "Aruba", "Aruba", "Aruba"…
## $ subregion <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
## $ life_ladder <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 3.203, 3.203…
ggplot(world)+
geom_polygon(aes(x=long, y=lat, group=group,fill=life_ladder))+
scale_fill_viridis(option = "plasma")+
theme_void()+
labs(fill= "happiness",
title= "2019 World Happiness",caption = "source: https://worldhappiness.report/ed/2019/#read")+
theme(plot.caption = element_text(face = 'bold.italic', hjust = 1),
plot.title = element_text(hjust = .5))
Immediatley we see that the world is heavily divided in accordance to a countries happiness level. The more lively colors protrayed in North America, Austrailia, and hints in Northern Europe seem to have a much higher level of happiness than those in Central Africa. Countries colored in gray were not included in the data.
Take a look at the happiness level now divided by region.
# regional happiness
summaryRegionHappiness <- final_data%>%
filter(year == 2018)%>%
filter(!is.na(region))
summaryRegionHappiness <- summaryRegionHappiness%>%
group_by(region)%>%
summarise(meanHappiness = mean(life_ladder))%>%
mutate(region = fct_reorder(region,meanHappiness))
summaryRegionHappiness
## # A tibble: 7 x 2
## region meanHappiness
## <fct> <dbl>
## 1 East Asia & Pacific 5.75
## 2 Europe & Central Asia 6.27
## 3 Latin America & Caribbean 6.02
## 4 Middle East & North Africa 5.70
## 5 North America 7.03
## 6 South Asia 4.55
## 7 Sub-Saharan Africa 4.51
ggplot(summaryRegionHappiness)+
geom_col(aes(x = region, y = meanHappiness, fill = meanHappiness))+
coord_flip()+
scale_y_continuous(expand = expand_scale(mult=c(0,.5)))+
labs(x = 'Mean happiness score', y = 'Region', title = 'Mean Happiness Score per Region', fill = 'Mean Happiness',caption = "source: https://worldhappiness.report/ed/2019/#read")+
scale_fill_viridis(option= 'plasma')+
theme_half_open()+
theme(
plot.title = element_text(hjust = .5),
plot.caption = element_text(face = 'bold.italic', hjust = 0)
)+
annotate('text', y = 7.6, x = 'North America',
label = '7.03' )+
annotate('text', y = 6.8, x = 'Europe & Central Asia',
label = '6.27' )+
annotate('text', y = 6.6, x = 'Latin America & Caribbean',
label = '6.02' )+
annotate('text', y = 6.4, x = 'East Asia & Pacific',
label = '5.75' )+
annotate('text', y = 6.3, x = 'Middle East & North Africa',
label = '5.70' )+
annotate('text', y = 5.1, x = 'South Asia',
label = '4.55' )+
annotate('text', y = 5.1, x = 'Sub-Saharan Africa',
label = '4.51' )
This chart will be the base visualization for the remainder of the report. We have compared specific social and economical factors in order to see what motivates happiness in some regions compared to others. You can follow the region by their mean happiness score denoted by the color of the bar. Yellow being most happy and dark blue being least. This will help you better understand the influence and weight of the following factors.
#overall correlation
summaryFactors <- final_data%>%
filter(year == 2018)
summaryFactors <- summaryFactors%>%
select(inequality,life_ladder,incomePperson,ppp,trustInGov,generosity,freedom,lifeExpAtBirth,social_support, perceptionCorrupt)
#glimpse(summaryFactors)
summaryFactors%>%
ggcorr(label = TRUE, label_round = 2,
label_color = 'white',
colors = "RdBu", hjust = .70, layout.exp = 4)
Using the above correlation matrix we observe that income per person, social support, and healthy life expectancy at birth are highly correlated with happiness overall. These factors can be divided into economic factors as well as social.
Below we highlight important economic factors, and later on social factors.
Each factor is of importance in order to clearly presume factors that differentaite happiness in specific regions of the world.
Economic Factors
Region | Mean Log(GDP) | Mean PPP | Mean Income per Person | |
---|---|---|---|---|
East Asia & Pacific | 9.58 | 1216.689 | 36.98 | |
Europe & Central Asia | 10.21 | 9.99 | 31.63 | |
Latin America & Caribbean | 9.26 | 139.42 | 46.79 | |
Middle East & North Africa | 9.84 | 124.50 | 36.23 | |
North America | 10.81 | 1.12 | 36.30 | |
South Asia | 8.75 | 32.20 | 35.20 | |
Sub - Sahran Africa | 7.98 | 480.58 | 42.94 |
PPP is measured by finding the values (in USD) of consumer goods that are present in each country. If that good costs $100 in the US and $200 in the United Kindom, then the purchasing power parity exchange rate is 1:2.
GDP is a measure used to evaluate the health of a country’s economy. It is the total value of the goods and services produced in a country during a specific period of time.
#economic graphs and description
#scatterplot gdpvshapiness
data_2018 <- full_data %>%
filter(year==2018)
corr_gdp <- round(cor(data_2018$log_gdp_per_capita, data_2018$life_ladder, method = "pearson", use="complete.obs"),2)
corr_ineq <- round(cor(data_2018$inequality, data_2018$life_ladder, method = "pearson", use="complete.obs"),2)
corr_ppp <- round(cor(data_2018$ppp, data_2018$life_ladder, method = "pearson", use="complete.obs"),2)
avg_region <- data_2018 %>%
filter(!is.na(log_gdp_per_capita)) %>%
group_by(region) %>%
summarize(average_gdp=mean(log_gdp_per_capita),
average_ppp=mean(ppp),
average_ineq=mean(inequality),
average_happ=mean(life_ladder))
avg_region
## # A tibble: 7 x 5
## region average_gdp average_ppp average_ineq
## <chr> <dbl> <dbl> <dbl>
## 1 East Asia & Pacific 9.58 1217. 37.0
## 2 Europe & Central Asia 10.2 9.99 31.6
## 3 Latin America & Caribbean 9.26 139. 46.8
## 4 Middle East & North Africa 9.84 125. 36.2
## 5 North America 10.8 1.12 36.3
## 6 South Asia 8.75 32.2 35.2
## 7 Sub-Saharan Africa 7.98 481. 42.9
## average_happ
## <dbl>
## 1 5.69
## 2 6.27
## 3 6.02
## 4 5.55
## 5 7.03
## 6 4.55
## 7 4.51
#scatterplot gdpvshapiness
ggplot(data_2018,aes(x=log_gdp_per_capita, y=life_ladder))+
geom_point()+
theme_minimal_grid()+
annotate(geom='text', x=7.5, y=7.5, label=str_c('r= ',corr_gdp), hjust=0, size=7, color = 'red')+
labs(title="GDP vs. Happiness 2018",
x="Log(GDP)",
y="Happiness", caption = "Source: https://worldhappiness.report/ed/2019/#read")+
theme(plot.caption = element_text(face = 'bold.italic', hjust = 1))
avg_region_gdp <- avg_region %>%
mutate(region=fct_reorder(region,average_gdp))
ggplot(avg_region_gdp)+
geom_col(aes(x=region, y=average_gdp, fill=average_happ))+
scale_fill_viridis(option="plasma")+
labs(x='Region',
y='Average Log GDP',
title='Happiness by region and GDP',
fill='happiness', caption = "Source: https://worldhappiness.report/ed/2019/#read")+
coord_flip()+
theme_minimal_vgrid()+
theme(plot.caption = element_text(face = 'bold.italic', hjust = 1))+
scale_y_continuous(expand = expand_scale(mult = c(0,0.5)))
The charts above take a deeper look in to the gross domestic product of a country and their happiness. GDP has a high positive correlation,.78, with happiness. In the bar chart, it shows that regions with a high GDP also have a higher happiness score. Although GDP isn’t an indicator of how much money one has, it is interesting to note that there is such a high correlation with a country’s economic health and the level of happiness.
#income inequality vs happiness
ggplot(data_2018, aes(x=inequality, y=life_ladder))+
geom_point()+
theme_minimal_grid()+
annotate(geom='text', x=7.5, y=7.5, label=str_c('r= ',corr_ineq), hjust=0, size=6, color = 'red')+
labs(title="Income Inequality vs. Happiness 2018",
x="Income Inequality",
y="Happiness", caption = 'Source: https://www.gapminder.org/data/')+
theme(plot.caption = element_text(face = 'bold.italic', hjust = 1))
avg_region_ineq <- avg_region %>%
mutate(region=fct_reorder(region,average_ineq))
ggplot(avg_region_ineq)+
geom_col(aes(x=region, y=average_ineq,fill=average_happ))+
scale_fill_viridis(option="plasma")+
labs(x='Region',
y='Income Inequality',
title='Happiness by Region and Income Inequality',
fill='happiness',caption = 'Source: https://www.gapminder.org/data/')+
coord_flip()+
theme_minimal_vgrid()+
theme(plot.caption = element_text(face = 'bold.italic', hjust = 1))+
scale_y_continuous(expand = expand_scale(mult = c(0,0.5)))
The next charts highlights income inequality in a country and their corresponding happiness level. One would assume that a lower income inequality would lead to a greater happiness but there is not much of a correlation between such factors. In the bar chart, we see that South Asia has the second lowest income inequality gap yet is one of the unhappiest regions. These graphs suggest that lower income inequality does not nessesarily have an affect on happiness as we previously thought.
#purchasing power vs happiness GDP based
ggplot(data_2018, aes(x=ppp, y=life_ladder))+
geom_point()+
scale_fill_viridis(option="plasma")+
theme_minimal_grid()+
annotate(geom='text', x=1000, y=7.5, label=str_c('r= ',corr_ppp), hjust=0, size=7, color = 'red')+
labs(title="Purchasing Power Parity Based on GDP vs. Happiness 2018",
x="Purcasing Power Parity",
y="Happiness", caption = 'Source: https://data.worldbank.org/indicator/')+
theme(plot.caption = element_text(face = 'bold.italic', hjust = 1))
avg_region_ppp <- avg_region %>%
mutate(region=fct_reorder(region,average_ppp))
ggplot(avg_region_ppp)+
geom_col(aes(x=region, y=average_ppp, fill=average_happ))+
scale_fill_viridis(option="plasma")+
labs(x='Region',
y='Purchasing Power Parity (PPP)',
title='Happiness by Region and PPP',
fill='happiness', caption = 'Source: https://data.worldbank.org/indicator/')+
coord_flip()+
theme_minimal_vgrid()+
theme(plot.caption = element_text(face = 'bold.italic', hjust = 1))+
scale_y_continuous(expand = expand_scale(mult = c(0,0.5)))
Next, we looked at purchasing power parity in a country. Purchasing power parity, or PPP, is a metric that compares economic productivity and standards of living between countries. It compares different currencies through a “basket of goods” approach. For example, how much one can buy for how much they make. This statistic was particularly interesting because we expected countries with a higher PPP to have greater happiness but, there seems to be no correlation at all, -0.16 to be exacct. The two happiest regions, North America and Europe & Central Asia actually have the lowest average PPP which may be because of the higher standards of living in western countries.
################
# CHART 1
# Suicide rate vs Happiness
################
suicide_happiness <- final_data %>%
select(country, year, life_ladder, log_gdp_per_capita, incomePperson, suicideR)%>%
filter(year == 2016)%>%
filter(!is.na(suicideR)) %>%
filter(!is.na(life_ladder))
#glimpse(suicide_happiness)
ggplot(suicide_happiness) +
geom_point(aes(x = suicideR, y = life_ladder)) +
theme_half_open(font_size = 11) +
labs( title = "Suicide Rate vs Happiness 2016",
y = "Happiness Score",
x = "Suicide Rate",
caption = "Source: https://apps.who.int/gho/data/node.main.MHSUICIDE") +
theme(plot.caption = element_text(face = 'bold.italic', hjust = 1))+
scale_color_viridis(discrete = FALSE) +
annotate('text', x = 25, y = 7.5, hjust = 0,
label = 'R = 0.2695',
size = 3.5, color = 'red')
ggplot(suicide_happiness) +
geom_point(aes(x = suicideR, y = life_ladder, color = incomePperson), alpha = 0.8, size = 2.2) +
theme_half_open(font_size = 11) +
labs( title = "Suicide Rate vs Happiness 2016",
y = "Happiness Score",
x = "Suicide Rate",
color = "Income Per Person",
caption = "Source: https://apps.who.int/gho/data/node.main.MHSUICIDE") +
theme(plot.caption = element_text(face = 'bold.italic', hjust = 1))+
scale_color_viridis(discrete = FALSE) +
annotate('text', x = 25, y = 7.5, hjust = 0,
label = 'R = 0.2695',
size = 3.5, color = 'red')
# R value
########################################################
mean_suic <- mean(suicide_happiness$suicideR)
mean_hap <- mean(suicide_happiness$life_ladder)
r_value_ <- suicide_happiness %>%
mutate(suicide_min_mean = (suicideR - mean_suic)*(suicideR- mean_suic)) %>%
mutate(happiness_min_mean = (life_ladder - mean_hap)*(life_ladder - mean_hap))%>%
mutate(happiness_suic = (suicideR - mean_suic)*(life_ladder - mean_hap))
#glimpse(r_value_)#122
sum_suic <- sum(r_value_$suicide_min_mean)
sum_hap <- sum(r_value_$happiness_min_mean)
sum_suic_hap <- sum(r_value_$happiness_suic)
s_suic <- sqrt(sum_suic/(122-1))
s_hap <- sqrt(sum_hap/(122-1))
s_suic_hap <- sum_suic_hap/(122-1)
r_s_h<- (s_suic_hap)/(s_suic*s_hap)
r_s_h
## [1] 0.2504473
# 0.2695
This first chart displays a scatter plot of the suicide rate vs happiness score for each country in the year 2016. As shown by the chart there is no relationship between the two factors, and is proven by the calculated r value of 0.2695.
However, when each point on the chart is colored by the country’s average income per person, it is clear that a majority of the countries represented have very low income.
Therefore, from this first graph we were able to conclude that happiness score and sucicide rate do not have an effect on one another, however income and happiness might. This led to the creation of our next graph where we compare happiness and income.
################
# CHART 2
# Suicide rate vs Income
################
#find mean of total happiness scores of those listed
suicide_happiness2 <- glimpse(final_data) %>%
filter(year == 2019) %>%
filter(!is.na(incomePperson)) %>%
filter(!is.na(life_ladder))
## Observations: 2,421
## Variables: 21
## $ country <chr> "Armenia", "Austria", "Azerbaijan", "Banglade…
## $ year <dbl> 2006, 2006, 2006, 2006, 2006, 2006, 2006, 200…
## $ inequality <dbl> 32.7, 29.8, 31.9, 33.0, 28.0, 40.2, 55.1, 62.…
## $ life_ladder <dbl> 4.289311, 7.122211, 4.727871, 4.318909, 5.657…
## $ log_gdp_per_capita <dbl> 8.717719, 10.657212, 9.279043, 7.616417, 9.42…
## $ social_support <dbl> 0.6818768, 0.9363504, 0.8544149, 0.6720022, 0…
## $ lifeExpAtBirth <dbl> 64.80, 70.76, 61.88, 59.02, 61.10, 50.10, 59.…
## $ freedom <dbl> 0.5201978, 0.9413823, 0.7715282, 0.6116642, 0…
## $ generosity <dbl> -0.216674119, 0.300667107, -0.253262460, 0.07…
## $ perceptionCorrupt <dbl> 0.8495131, 0.4901112, 0.7741172, 0.7859162, 0…
## $ positive_affect <dbl> 0.4941210, 0.8231047, 0.5116876, 0.5999454, 0…
## $ negative_affect <dbl> 0.4694188, 0.1738117, 0.2756951, 0.3207928, 0…
## $ trustInGov <dbl> 0.3443375, 0.4970378, 0.7547066, 0.6137370, 0…
## $ demoQuality <dbl> -0.50248164, 1.22430921, -1.18666148, -0.9783…
## $ ppp <dbl> 1.600727e+02, 8.609180e-01, 2.211839e-01, 1.7…
## $ country_code.y <chr> "ARM", "AUT", "AZE", "BGD", "BLR", "BEN", "BO…
## $ pppp <dbl> 149.7049751, 0.8785690, 0.2253549, 18.3772035…
## $ region <chr> "Europe & Central Asia", "Europe & Central As…
## $ income_group <chr> "Upper middle income", "High income", "Upper …
## $ incomePperson <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
## $ suicideR <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
#glimpse(suicide_happiness2)
mean_y <- mean(suicide_happiness2$life_ladder)
#mean_y
# 5.406222
ggplot(suicide_happiness2) +
geom_point(aes(x = incomePperson, y = life_ladder), color = "Steelblue", alpha = 0.88) +
theme_half_open(font_size = 11) +
labs( title = "Happiness Score vs Income 2019",
x = "Income per Person",
y = "Happiness Score",
caption = "Source: https://www.gapminder.org/data/") +
theme(plot.caption = element_text(face = 'bold.italic', hjust = 1))+
scale_color_viridis(discrete = FALSE) +
geom_hline(yintercept = 5.406222, col = 'red') +
annotate('text', x = 30000, y = 5.25, hjust = 0,
label = 'Mean Happiness score for countries represented',
color = 'red',
size = 3) +
annotate('text', x = 90000, y = 8, hjust = 0,
label = 'R = 0.728',
color = 'red',
size = 3)
In this chart we compared income per person and the overall happiness score of the countries. The R value of the two factors was calculated and found to be 0.728, this is a high correlation coefficent and therefore the two factors might have an impact on one another.
When looking at the chart, the majority of the happiness points that fall below the mean happiness score also have an income equal to or less than 30000. However, there are no points with an income above 30000 that fall below the mean happiness score. Therefore it can be said that with a high income it is more likely that your happiness level will be higher and above the mean.
#FIND THE R VALUE
###########################################################
# X VALUE, X = INCOME PER PERSON
# MEAN
mean_x <- mean(suicide_happiness2$income_per_person)
#mean_x
# 19974.12
r_value <- suicide_happiness2 %>%
mutate(income_minus_mean = (incomePperson - mean_x)*(incomePperson - mean_x)) %>%
mutate(happiness_minus_mean = (life_ladder - mean_y)*(life_ladder - mean_y))%>%
mutate(happiness_income = (incomePperson - mean_x)*(life_ladder - mean_y))
sum_x <- sum(r_value$income_minus_mean)
sum_y <- sum(r_value$happiness_minus_mean)
sum_x_y <- sum(r_value$happiness_income)
s_x <- sqrt(sum_x/(144-1))
s_y <- sqrt(sum_y/(144-1))
s_x_y <- sum_x_y/(144-1)
r_x_y <- (s_x_y)/(s_x*s_y)
#r_x_y
# .7277
#144
#########################################################################
# income per person bar chart
# join suicide_happiness2 and clean_country by country
check1 <-final_data %>%
filter(year == 2018)
#glimpse(check1)
bar_income <- final_data %>%
filter(!is.na(incomePperson))%>%
filter(!is.na(life_ladder))%>%
filter(!is.na(region)) %>%
group_by(region) %>%
summarise(mean_income= mean(incomePperson),
mean_happy = mean(life_ladder)) %>%
mutate(
region = fct_reorder(region, mean_income))
#glimpse(bar_income)
ggplot(bar_income) +
geom_col(aes(x = region, y = mean_income, fill = mean_happy),
width = 0.7) +
scale_y_continuous(
expand = expand_scale(mult = c(0, 0.5))
) +
coord_flip() +
theme_minimal_vgrid(font_size = 11) +
labs( title = "Mean Income Per Region",
y = "Mean Income",
x = "Region",
fill = "Mean Happiness",
caption = "Source: https://www.gapminder.org/data/") +
theme(plot.caption = element_text(face = 'bold.italic', hjust = 1))+
scale_fill_viridis(
discrete = FALSE, option = 'plasma') +
theme(axis.title.y = element_blank())
This chart shows how each region compares to one another in terms of mean income and mean happiness level over the years 2008-2018. This chart further proves the idea that a higher income per person means a higher happiness score. The countries with the highest income per person are most happy, while the countries with the lowest income per person are the least happy.
After observing the impacts of the economy on happiness, we switch to the social impacts. Do social factors also play a role in a regions happiness? The variables we looked into range anywhere from social support to ones perception of corruption.
Social Factors
Region | Mean Social Support | Mean Healthy Life Expectancy | Mean Freedom Score | Mean Trust in Government | Mean Perception of Corruption |
---|---|---|---|---|---|
East Asia & Pacific | .87 | 67.81 | .87 | .65 | .62 |
Europe & Central Asia | .88 | 70.05 | .79 | .43 | .67 |
Latin America & Caribbean | .84 | 66.84 | .83 | .34 | .79 |
Middle East & North Africa | .85 | 69.87 | .72 | .43 | .78 |
North America | .91 | 70.95 | .89 | .46 | .54 |
South Asia | .71 | 62.52 | .85 | .68 | .79 |
Sub - Sahran Africa | .70 | 56.57 | .74 | .60 | .77 |
#Correlation between happiness and social support
EighteenData <- final_data%>%
filter(year==2018)%>%
filter(!is.na(region))%>%
filter(!is.na(trustInGov))%>%
filter(!is.na(perceptionCorrupt))
summaryEighteen <-EighteenData%>%
group_by(region)%>%
summarise(meanLife = mean(lifeExpAtBirth),
meanSocial = mean(social_support),
meanFreedom = mean(freedom),
meanTrust = mean(trustInGov),
meanGenerosity = mean(generosity),
meanPerception = mean(perceptionCorrupt),
meanHappiness = mean(life_ladder))
#EighteenData
summaryEighteen <- summaryEighteen%>%
mutate(region1 = fct_reorder(region,meanSocial),
region2 = fct_reorder(region,meanFreedom),
region3 = fct_reorder(region,meanTrust),
region4 = fct_reorder(region,meanGenerosity),
region5 = fct_reorder(region,meanPerception),
region6 = fct_reorder(region,meanLife)
)
#summaryEighteen
# social support
corSocial <- round(cor(
EighteenData$social_support, EighteenData$life_ladder,
use = "complete.obs"), 2)
ggplot(summaryEighteen)+
geom_col(aes(x = region1, y = meanSocial, fill = meanHappiness))+
scale_fill_viridis(option= 'plasma')+
coord_flip()+
theme_minimal_vgrid()+
scale_y_continuous(expand = expand_scale(mult = c(0,.25)))+
labs(x = 'Mean Social Support', y = 'Region', title = 'Mean Social Support Score per Region', fill = 'Mean Happiness',
caption = "Source: https://worldhappiness.report/ed/2019/#read")+
theme(
plot.title = element_text(hjust = .5),
plot.caption = element_text(face = 'bold.italic', hjust = 1))
ggplot(EighteenData)+
geom_point(aes(x = social_support, y = life_ladder))+
annotate('text', x = .55, y = 7, label = str_c('r = ', corSocial), color = 'red')+
theme_minimal()+
labs(x = 'Social Support', y = 'Happiness Score', title = 'Correlation between Social Support and Happiness',
caption = "Source: https://worldhappiness.report/ed/2019/#read") +
theme(
plot.title = element_text(hjust = .5),
plot.caption = element_text(face = 'bold.italic', hjust = 1))
Social support is a strong indication of a regions happiness. We see that the happiest countries also have the most social support. This is highlighted in the correlation chart where we see that the R value of happiness and social support is very high.
#Correlation between happiness and life expectancy
ggplot(summaryEighteen)+
geom_col(aes(x = region6, y = meanLife, fill = meanHappiness))+
scale_fill_viridis(option= 'plasma')+
coord_flip()+
theme_minimal_vgrid()+
scale_y_continuous(expand = expand_scale(mult = c(0,.25)))+
labs(x = 'Mean Healthy Life Expectancy at Birth', y = 'Region', title = 'Mean of Healthy Life Expectancy at Birth per Region', fill = 'Mean Happiness',
caption = "Source: https://worldhappiness.report/ed/2019/#read")+
theme(
plot.title = element_text(hjust = .5),
plot.caption = element_text(face = 'bold.italic', hjust = 1))
corLifeExp <- round(cor(
EighteenData$lifeExpAtBirth, EighteenData$life_ladder,
use = "complete.obs"), 2)
ggplot(EighteenData)+
geom_point(aes(x = lifeExpAtBirth, y = life_ladder))+
annotate('text', x = 55, y = 7, label = str_c('r = ', corLifeExp), color = 'red')+
theme_minimal()+
labs(x = 'Healthy Life Expectancy at Birth', y = 'Happiness Score', title = 'Correlation between Life Expectancy and Happiness',
caption = "Source: https://worldhappiness.report/ed/2019/#read") +
theme(
plot.title = element_text(hjust = .5),
plot.caption = element_text(face = 'bold.italic', hjust = 1))
Healthy Life expectancy is also strong indication of a regions happiness. We see that the happiest countries also have the ability to live a long and healthy life. This is highlighted in the correlation chart where we see that the R value of happiness and life expectancy is very high.
#Correlation between happiness and freedom
ggplot(summaryEighteen)+
geom_col(aes(x = region2, y = meanFreedom, fill = meanHappiness))+
scale_fill_viridis(option= 'plasma')+
coord_flip()+
theme_minimal_vgrid()+
scale_y_continuous(expand = expand_scale(mult = c(0,.25)))+
labs(x = 'Mean Level of Freedom', y = 'Region', title = 'Mean Level of Freedom per Region', fill = 'Mean Happiness',
caption = "Source: https://worldhappiness.report/ed/2019/#read")+
theme(
plot.title = element_text(hjust = .5),
plot.caption = element_text(face = 'bold.italic', hjust = 1))
corFreedom <- round(cor(
EighteenData$freedom, EighteenData$life_ladder,
use = "complete.obs"), 2)
ggplot(EighteenData)+
geom_point(aes(x = freedom, y = life_ladder))+
annotate('text', x = .5, y = 7, label = str_c('r = ', corFreedom), color = 'red')+
theme_minimal()+
labs(x = 'Freedom', y = 'Happiness Score', title = 'Correlation between Feedom and Happiness',
caption = "Source: https://worldhappiness.report/ed/2019/#read") +
theme(
plot.title = element_text(hjust = .5),
plot.caption = element_text(face = 'bold.italic', hjust = 1))
Initally one would think there is a strong correlation between freedom and happiness on the assumption that a happy life consists of being free enough to make your own decisions. Although North America has a high level of both freedom and happiness, it is also shown that countries represented in South Asia such as India and Afghanistan, two countries with suppossedly high government restrictions express having a high level of freedom as well. Freedom is a hard variable to compare with happiness as the definition of freedom varies across regions.
#Correlation between happiness and trustInGov
ggplot(summaryEighteen)+
geom_col(aes(x = region3, y = meanTrust, fill = meanHappiness))+
scale_fill_viridis(option= 'plasma')+
coord_flip()+
theme_minimal_vgrid()+
scale_y_continuous(expand = expand_scale(mult = c(0,.25)))+
labs(x = 'Mean Trust in Government', y = 'Region', title = 'Mean Trust in Government per Region', fill = 'Mean Happiness',
caption = "Source: https://worldhappiness.report/ed/2019/#read")+
theme(
plot.title = element_text(hjust = .5),
plot.caption = element_text(face = 'bold.italic', hjust = 1))
corTrust <- round(cor(
EighteenData$trustInGov, EighteenData$life_ladder,
use = "complete.obs"), 2)
ggplot(EighteenData)+
geom_point(aes(x = trustInGov, y = life_ladder))+
annotate('text', x = .2, y = 7.5, label = str_c('r = ', corTrust), color = 'red')+
theme_minimal()+
labs(x = 'Trust in the Government', y = 'Happiness Score', title = 'Correlation between Trust in the Government and Happiness',
caption = "Source: https://worldhappiness.report/ed/2019/#read") +
theme(
plot.title = element_text(hjust = .5),
plot.caption = element_text(face = 'bold.italic', hjust = 1))
We can see here that trust in government does not predict one’s level of happiness to any extent. This could relate to the results gathered from the freedom correlation charts. As citizens of a country usually believe their government has their best interest in mind.
#Correlation between happiness and perceptions of corrption
ggplot(summaryEighteen)+
geom_col(aes(x = region5, y = meanPerception, fill = meanHappiness))+
scale_fill_viridis(option= 'plasma')+
coord_flip()+
theme_minimal_vgrid()+
scale_y_continuous(expand = expand_scale(mult = c(0,.25)))+
labs(x = 'Mean Perception of Corruption', y = 'Region', title = 'Mean Perception of Corruption per Region', fill = 'Mean Happiness',
caption = "Source: https://worldhappiness.report/ed/2019/#read")+
theme(
plot.title = element_text(hjust = .5),
plot.caption = element_text(face = 'bold.italic', hjust = 1))
corPerception <- round(cor(
EighteenData$perceptionCorrupt, EighteenData$life_ladder,
use = "complete.obs"), 2)
ggplot(EighteenData)+
geom_point(aes(x = perceptionCorrupt, y = life_ladder))+
annotate('text', x = .25, y = 6, label = str_c('r = ', corPerception), color = 'red')+
theme_minimal()+
labs(x = 'Perception of Corruption', y = 'Happiness Score', title = 'Correlation between Perception of Corrpution and Happiness',
caption = "Source: https://worldhappiness.report/ed/2019/#read") +
theme(
plot.title = element_text(hjust = .5),
plot.caption = element_text(face = 'bold.italic', hjust = 1))
Lastly, perception of corruption has moderate negative correlation with happiness. So, then regions with a high perception of corruption display low levels of happiness. While, this seems to make sense, it can be argued, as we argued for freedom, that there will be bias and influnce from the countries governement, and a level of fear.
################
# DATA
# TOP 10 COUNTRIES
################
glimpse(final_data)
## Observations: 2,421
## Variables: 21
## $ country <chr> "Armenia", "Austria", "Azerbaijan", "Banglade…
## $ year <dbl> 2006, 2006, 2006, 2006, 2006, 2006, 2006, 200…
## $ inequality <dbl> 32.7, 29.8, 31.9, 33.0, 28.0, 40.2, 55.1, 62.…
## $ life_ladder <dbl> 4.289311, 7.122211, 4.727871, 4.318909, 5.657…
## $ log_gdp_per_capita <dbl> 8.717719, 10.657212, 9.279043, 7.616417, 9.42…
## $ social_support <dbl> 0.6818768, 0.9363504, 0.8544149, 0.6720022, 0…
## $ lifeExpAtBirth <dbl> 64.80, 70.76, 61.88, 59.02, 61.10, 50.10, 59.…
## $ freedom <dbl> 0.5201978, 0.9413823, 0.7715282, 0.6116642, 0…
## $ generosity <dbl> -0.216674119, 0.300667107, -0.253262460, 0.07…
## $ perceptionCorrupt <dbl> 0.8495131, 0.4901112, 0.7741172, 0.7859162, 0…
## $ positive_affect <dbl> 0.4941210, 0.8231047, 0.5116876, 0.5999454, 0…
## $ negative_affect <dbl> 0.4694188, 0.1738117, 0.2756951, 0.3207928, 0…
## $ trustInGov <dbl> 0.3443375, 0.4970378, 0.7547066, 0.6137370, 0…
## $ demoQuality <dbl> -0.50248164, 1.22430921, -1.18666148, -0.9783…
## $ ppp <dbl> 1.600727e+02, 8.609180e-01, 2.211839e-01, 1.7…
## $ country_code.y <chr> "ARM", "AUT", "AZE", "BGD", "BLR", "BEN", "BO…
## $ pppp <dbl> 149.7049751, 0.8785690, 0.2253549, 18.3772035…
## $ region <chr> "Europe & Central Asia", "Europe & Central As…
## $ income_group <chr> "Upper middle income", "High income", "Upper …
## $ incomePperson <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
## $ suicideR <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
top_10 <- final_data %>%
filter(year == 2019) %>%
filter(!is.na(life_ladder))%>%
arrange(desc(life_ladder)) %>%
slice(1:10)
# THE TOP 10 COUNTRIES
# finland, denmark, norway, iceland, netherlands, switzerland, sweden, new zealand, canada, austria
glimpse(top_10)
## Observations: 10
## Variables: 21
## $ country <chr> "Finland", "Denmark", "Norway", "Iceland", "N…
## $ year <dbl> 2019, 2019, 2019, 2019, 2019, 2019, 2019, 201…
## $ inequality <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA
## $ life_ladder <dbl> 7.769, 7.600, 7.554, 7.494, 7.488, 7.480, 7.3…
## $ log_gdp_per_capita <dbl> 1.340, 1.383, 1.488, 1.380, 1.396, 1.452, 1.3…
## $ social_support <dbl> 1.587, 1.573, 1.582, 1.624, 1.522, 1.526, 1.4…
## $ lifeExpAtBirth <dbl> 0.986, 0.996, 1.028, 1.026, 0.999, 1.052, 1.0…
## $ freedom <dbl> 0.596, 0.592, 0.603, 0.591, 0.557, 0.572, 0.5…
## $ generosity <dbl> 0.153, 0.252, 0.271, 0.354, 0.322, 0.263, 0.2…
## $ perceptionCorrupt <dbl> 0.393, 0.410, 0.341, 0.118, 0.298, 0.343, 0.3…
## $ positive_affect <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA
## $ negative_affect <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA
## $ trustInGov <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA
## $ demoQuality <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA
## $ ppp <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA
## $ country_code.y <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA
## $ pppp <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA
## $ region <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA
## $ income_group <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA
## $ incomePperson <dbl> 42400, 48300, 66300, 47900, 50500, 59000, 472…
## $ suicideR <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA
top_10
## # A tibble: 10 x 21
## country year inequality life_ladder log_gdp_per_capita
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 Finland 2019 NA 7.77 1.34
## 2 Denmark 2019 NA 7.6 1.38
## 3 Norway 2019 NA 7.55 1.49
## 4 Iceland 2019 NA 7.49 1.38
## 5 Netherlands 2019 NA 7.49 1.40
## 6 Switzerland 2019 NA 7.48 1.45
## 7 Sweden 2019 NA 7.34 1.39
## 8 New Zealand 2019 NA 7.31 1.30
## 9 Canada 2019 NA 7.28 1.36
## 10 Austria 2019 NA 7.25 1.38
## social_support lifeExpAtBirth freedom generosity perceptionCorrupt
## <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 1.59 0.986 0.596 0.153 0.393
## 2 1.57 0.996 0.592 0.252 0.41
## 3 1.58 1.03 0.603 0.271 0.341
## 4 1.62 1.03 0.591 0.354 0.118
## 5 1.52 0.999 0.557 0.322 0.298
## 6 1.53 1.05 0.572 0.263 0.343
## 7 1.49 1.01 0.574 0.267 0.373
## 8 1.56 1.03 0.585 0.33 0.38
## 9 1.50 1.04 0.584 0.285 0.308
## 10 1.48 1.02 0.532 0.244 0.226
## positive_affect negative_affect trustInGov demoQuality ppp
## <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 NA NA NA NA NA
## 2 NA NA NA NA NA
## 3 NA NA NA NA NA
## 4 NA NA NA NA NA
## 5 NA NA NA NA NA
## 6 NA NA NA NA NA
## 7 NA NA NA NA NA
## 8 NA NA NA NA NA
## 9 NA NA NA NA NA
## 10 NA NA NA NA NA
## country_code.y pppp region income_group incomePperson suicideR
## <chr> <dbl> <chr> <chr> <dbl> <dbl>
## 1 <NA> NA <NA> <NA> 42400 NA
## 2 <NA> NA <NA> <NA> 48300 NA
## 3 <NA> NA <NA> <NA> 66300 NA
## 4 <NA> NA <NA> <NA> 47900 NA
## 5 <NA> NA <NA> <NA> 50500 NA
## 6 <NA> NA <NA> <NA> 59000 NA
## 7 <NA> NA <NA> <NA> 47200 NA
## 8 <NA> NA <NA> <NA> 36500 NA
## 9 <NA> NA <NA> <NA> 44200 NA
## 10 <NA> NA <NA> <NA> 46900 NA
################
# DATA
# BOTTOM 10 COUNTRIES
################
glimpse(final_data)
## Observations: 2,421
## Variables: 21
## $ country <chr> "Armenia", "Austria", "Azerbaijan", "Banglade…
## $ year <dbl> 2006, 2006, 2006, 2006, 2006, 2006, 2006, 200…
## $ inequality <dbl> 32.7, 29.8, 31.9, 33.0, 28.0, 40.2, 55.1, 62.…
## $ life_ladder <dbl> 4.289311, 7.122211, 4.727871, 4.318909, 5.657…
## $ log_gdp_per_capita <dbl> 8.717719, 10.657212, 9.279043, 7.616417, 9.42…
## $ social_support <dbl> 0.6818768, 0.9363504, 0.8544149, 0.6720022, 0…
## $ lifeExpAtBirth <dbl> 64.80, 70.76, 61.88, 59.02, 61.10, 50.10, 59.…
## $ freedom <dbl> 0.5201978, 0.9413823, 0.7715282, 0.6116642, 0…
## $ generosity <dbl> -0.216674119, 0.300667107, -0.253262460, 0.07…
## $ perceptionCorrupt <dbl> 0.8495131, 0.4901112, 0.7741172, 0.7859162, 0…
## $ positive_affect <dbl> 0.4941210, 0.8231047, 0.5116876, 0.5999454, 0…
## $ negative_affect <dbl> 0.4694188, 0.1738117, 0.2756951, 0.3207928, 0…
## $ trustInGov <dbl> 0.3443375, 0.4970378, 0.7547066, 0.6137370, 0…
## $ demoQuality <dbl> -0.50248164, 1.22430921, -1.18666148, -0.9783…
## $ ppp <dbl> 1.600727e+02, 8.609180e-01, 2.211839e-01, 1.7…
## $ country_code.y <chr> "ARM", "AUT", "AZE", "BGD", "BLR", "BEN", "BO…
## $ pppp <dbl> 149.7049751, 0.8785690, 0.2253549, 18.3772035…
## $ region <chr> "Europe & Central Asia", "Europe & Central As…
## $ income_group <chr> "Upper middle income", "High income", "Upper …
## $ incomePperson <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
## $ suicideR <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
bottom_10 <- final_data %>%
filter(year == 2019) %>%
filter(!is.na(life_ladder))%>%
arrange(life_ladder) %>%
slice(1:10)
glimpse(bottom_10)
## Observations: 10
## Variables: 21
## $ country <chr> "South Sudan", "Central African Republic", "A…
## $ year <dbl> 2019, 2019, 2019, 2019, 2019, 2019, 2019, 201…
## $ inequality <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA
## $ life_ladder <dbl> 2.853, 3.083, 3.203, 3.231, 3.334, 3.380, 3.4…
## $ log_gdp_per_capita <dbl> 0.306, 0.026, 0.350, 0.476, 0.359, 0.287, 0.1…
## $ social_support <dbl> 0.575, 0.000, 0.517, 0.885, 0.711, 1.163, 0.5…
## $ lifeExpAtBirth <dbl> 0.295, 0.105, 0.361, 0.499, 0.614, 0.463, 0.4…
## $ freedom <dbl> 0.010, 0.225, 0.000, 0.417, 0.555, 0.143, 0.4…
## $ generosity <dbl> 0.202, 0.235, 0.158, 0.276, 0.217, 0.108, 0.2…
## $ perceptionCorrupt <dbl> 0.091, 0.035, 0.025, 0.147, 0.411, 0.077, 0.0…
## $ positive_affect <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA
## $ negative_affect <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA
## $ trustInGov <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA
## $ demoQuality <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA
## $ ppp <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA
## $ country_code.y <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA
## $ pppp <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA
## $ region <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA
## $ income_group <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA
## $ incomePperson <dbl> 1860, 794, 1760, 2980, 2110, 2340, 1180, 2900…
## $ suicideR <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA
bottom_10
## # A tibble: 10 x 21
## country year inequality life_ladder log_gdp_per_capita
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 South Sudan 2019 NA 2.85 0.306
## 2 Central African Republic 2019 NA 3.08 0.026
## 3 Afghanistan 2019 NA 3.20 0.35
## 4 Tanzania 2019 NA 3.23 0.476
## 5 Rwanda 2019 NA 3.33 0.359
## 6 Yemen 2019 NA 3.38 0.287
## 7 Malawi 2019 NA 3.41 0.191
## 8 Syria 2019 NA 3.46 0.619
## 9 Botswana 2019 NA 3.49 1.04
## 10 Haiti 2019 NA 3.60 0.323
## social_support lifeExpAtBirth freedom generosity perceptionCorrupt
## <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 0.575 0.295 0.01 0.202 0.091
## 2 0 0.105 0.225 0.235 0.035
## 3 0.517 0.361 0 0.158 0.025
## 4 0.885 0.499 0.417 0.276 0.147
## 5 0.711 0.614 0.555 0.217 0.411
## 6 1.16 0.463 0.143 0.108 0.077
## 7 0.56 0.495 0.443 0.218 0.089
## 8 0.378 0.44 0.013 0.331 0.141
## 9 1.14 0.538 0.455 0.025 0.1
## 10 0.688 0.449 0.026 0.419 0.11
## positive_affect negative_affect trustInGov demoQuality ppp
## <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 NA NA NA NA NA
## 2 NA NA NA NA NA
## 3 NA NA NA NA NA
## 4 NA NA NA NA NA
## 5 NA NA NA NA NA
## 6 NA NA NA NA NA
## 7 NA NA NA NA NA
## 8 NA NA NA NA NA
## 9 NA NA NA NA NA
## 10 NA NA NA NA NA
## country_code.y pppp region income_group incomePperson suicideR
## <chr> <dbl> <chr> <chr> <dbl> <dbl>
## 1 <NA> NA <NA> <NA> 1860 NA
## 2 <NA> NA <NA> <NA> 794 NA
## 3 <NA> NA <NA> <NA> 1760 NA
## 4 <NA> NA <NA> <NA> 2980 NA
## 5 <NA> NA <NA> <NA> 2110 NA
## 6 <NA> NA <NA> <NA> 2340 NA
## 7 <NA> NA <NA> <NA> 1180 NA
## 8 <NA> NA <NA> <NA> 2900 NA
## 9 <NA> NA <NA> <NA> 16800 NA
## 10 <NA> NA <NA> <NA> 1640 NA
# THE BOTTOME 10 COUNTRIES
# South Sudan, Central African Republic, Afghanistan, Tanzania, Rwanda, Yemen, Malawi, Syria, Botswana, Haiti
################
# GRAPH 3
# TOP / BOTTOM 10 COUNTRIES
################
# combine the data
ten <- bind_rows(top_10, bottom_10) %>%
mutate(country = fct_reorder(country, life_ladder)) %>%
mutate(color = ifelse(life_ladder > 5, "bottom", "top"))
#glimpse(ten)
ggplot(ten) +
geom_col(aes(x = country, y = life_ladder, fill = color),
width = 0.7, alpha = 0.8) +
scale_y_continuous(
expand = expansion(mult = c(0, 0.05))
) +
coord_flip() +
theme_minimal_vgrid(font_size = 11) +
scale_fill_manual( values = c( 'steelblue', 'lightblue')) +
theme(legend.position = 'none',
axis.title.y = element_blank(),
plot.caption = element_text(face = 'bold.italic', hjust = 1)) +
labs(
y = "Overall Happiness Score",
title = "Top 10 and Bottom 10 Overall Happiness Score",
subtitle = "2019",
caption = "Source: https://worldhappiness.report/ed/2019/#read"
)
The top ten countries with the highest happiness score, and bottom ten countries with the lowest happiness score were found and compared. The graph displays that the top ten countries’ overall happiness scores are about double of the bottom ten countries. This chart also shows that almost all of the top ten countries are found in Europe besides Canada and New Zealand, as well as a majority of the bottom ten countries are located in Africa and Asia.
################
# GRAPH 4
# TOP / BOTTOM 10 COUNTRIES INCOME COMPARISON
################
# combine the data
ggplot(ten) +
geom_col(aes(x = country, y = incomePperson, fill = color),
width = 0.7, alpha = 0.8) +
scale_y_continuous(
expand = expansion(mult = c(0, 0.05))
) +
coord_flip() +
theme_minimal_vgrid(font_size = 11) +
scale_fill_manual( values = c( 'steelblue', 'lightblue')) +
theme(legend.position = 'none',
axis.title.y = element_blank(),
plot.caption = element_text(face = 'bold.italic', hjust = 1))+
labs(
y = "Income Per Person",
title = "Top 10 and Bottom 10 Overall Happiness Score",
subtitle = "Income Per Person Comparison, 2019",
caption = "Source: https://worldhappiness.report/ed/2019/#read"
)
As displayed above the difference in income between the top ten countries and bottom ten are drastically different. The average income for the bottom ten countries was found to be around 3,400.00 dollars and the average income for the top ten countries is found to be around 50,000 dollars. The top countries have an income around fourteen times greater than the bottom ten.
This proves to show that a high income, in developed countries, will most likely result in a higher happiness score than low income, under-developed countries.
tenGDP <- bind_rows(top_10, bottom_10) %>%
mutate(country = fct_reorder(country, log_gdp_per_capita)) %>%
mutate(color = ifelse(life_ladder > 5, "bottom", "top"))
ggplot(tenGDP) +
geom_col(aes(x = country, y = log_gdp_per_capita, fill = color),
width = 0.7, alpha = 0.8) +
scale_y_continuous(
expand = expansion(mult = c(0, 0.05))
) +
coord_flip() +
theme_minimal_vgrid(font_size = 11) +
scale_fill_manual( values = c( 'steelblue', 'lightblue')) +
theme(legend.position = 'none',
axis.title.y = element_blank(),
plot.caption = element_text(face = 'bold.italic', hjust = 1))+
labs(
y = "Income Per Person",
title = "Top 10 and Bottom 10 Overall Happiness Score",
subtitle = "log(gdp) Comparison, 2019",
caption = "Source: https://worldhappiness.report/ed/2019/#read"
)
As displayed above the difference in economic health between the top ten countries and bottom ten illustrates a major difference. This proves to show that just a healthy and smooth running economy has a strong likelihood increase the happiness of a countries population.
ggplot(ten_social_health) +
geom_col(aes(x = country, y = social_support, fill = color),width = 0.7, alpha = 0.8) +
scale_fill_manual( values = c( 'steelblue', 'lightblue')) +
scale_y_continuous(
expand = expansion(mult = c(0, 0.05))
) +
coord_flip() +
theme_minimal_vgrid(font_size = 11) +
theme(legend.position = 'none',
axis.title.y = element_blank(),
plot.caption = element_text(face = 'bold.italic', hjust = 1)) +
labs(
y = "Social Support",
title = "Top 10 and Bottom 10 Overall Happiness Score",
subtitle = "Social Support, 2019",
caption = "Source: https://worldhappiness.report/ed/2019/#read"
)
This chart compares the social support scores of each country. Again, the level of social support represents how strongly the country provides support of family and friends.
This graph portrays that all of the top ten countries have a high sense of social support. There is more variety amoungst the bottom ten countries, but the majority of them have less than half the social support then the top ten have. We conclude that social support heavily influences a country’s overall happiness score.
#HEALTHY LIFE EXPECTANCY
ggplot(ten_social_health) +
geom_col(aes(x = country, y = lifeExpAtBirth, fill = color),width = 0.7, alpha = 0.8) +
scale_fill_manual( values = c( 'steelblue', 'lightblue')) +
scale_y_continuous(
expand = expansion(mult = c(0, 0.05))
) +
coord_flip() +
theme_minimal_vgrid(font_size = 11) +
theme(legend.position = 'none',
axis.title.y = element_blank(),
plot.caption = element_text(face = 'bold.italic', hjust = 1)) +
labs(
y = "Healthy Life Expectancy",
title = "Top 10 and Bottom 10 Overall Happiness Score",
subtitle = "Healthy Life Expextancy, 2019",
caption = "Source: https://worldhappiness.report/ed/2019/#read")
This chart displays the difference in life expectancy between the top ten countries and the bottom ten. The top ten countries all have a high healthy life expectancy, and the bottom ten countries live a much shorter life. Countries where one is to live a long and prosperous life is more likely to live a happier life.
In conclusion, factors that have a high influence, or correlation, to a region or countries happiness are concentrated in economic and social factors. Namely, GDP per capita, income per person, social support and a healthy life expectantcy. The visualizations reflecting freedom, trust in the government, and perception of corruption disproved original claims that factors directly involving a countries governance would highly impact ones level of happiness, as the correlation coefficents were minimal to moderate.
We can agree that through this analyzation proccess happiness most likely comes from a level of satisfaction and safety in ones life.
As countries evolve, we ask the question if these four factors are continuously correlated to a country’s increase or decrease in happiness. For example, as ones happiness decreases is that directly related to a decrease in their income per person or life expectancy?
This report leads to more questions about the pursuit of happiness, and what happiness truly looks like across the globe.
Variable | Data type | Description |
---|---|---|
Country | character | Country name |
Year | numerical | Year data was collected |
Life_ladder | numerical | Average score of people when asked to score their life on a scale of 0 to 10, 10 being the best and 0 being the worst possible life. People rated based on where they felt they were at the time |
Log_gdp_per_capita | numberical | Log of the gross domestic product per country |
Healthy_life_expectancy | numerical | Life expectancy based on data from the World Health Organization |
Social support | numerical | Reflects “having someone to count on in times of need” on a 0-1 scale in the Gallup world report |
Freedom | numerical | Average response when asked a person’s freedom to choose what to do with their life according to the gallup world report |
Generosity | numerical | Data from the gallup world report based on number of people who gave to charity in the past month |
Variable | Data type | Description |
---|---|---|
Country | character | Country victim lost their life in |
Sex | character | Gender of victim (Male/female/Both) |
Year | double | Year incident happened |
Age | character | Age category of victim |
Suicide_no | double | Number of deaths by intentional harm |
Population | double | Number of all people living in the country. |
Variable | Data type | Description |
---|---|---|
Country | character | Country victim lost their life in |
Sex | character | Gender of victim (Male/female/Both) |
Year | double | Year incident happened |
suicide_rate | double | rate of crude suicide deaths per 100,000 |
Variable | Data type | Description |
---|---|---|
country | character | Country name |
country_code | character | 3 letter country abbreviation |
region | character | Country region |
year | numerical | Year data was collected |
Ppp (purchasing power parity gdp) | numerical | the number of units of a country’s currency required to buy the same amounts of goods and services in the domestic market as U.S. dollar would buy in the United States. This conversion factor is for GDP. |
Pppp (purchasing power parity private) | numerical | the number of units of a country’s currency required to buy the same amounts of goods and services in the domestic market as U.S. dollar would buy in the United States. This conversion factor is for private consumption (i.e., household final consumption expenditure). |
Variable | Data type | Description |
---|---|---|
Country | character | Country name |
Year | double | year |
Income_per_person | double | gross domestic product per person adjusted for differences in purchasing power to show the overall income per person for each country |
Income_inequality | double | How much the distribution of income deviates from perfectly equal distribution |