knitr::opts_chunk$set(
    warning = FALSE,
    message = FALSE)
#data cleans and joins
#adding the data
happiness <- read_excel(here::here("data_raw", 'happinessdata.xls'))
income_inequality <- read_csv(here::here('data_raw',"gini.csv"))
PPP <- read_excel(here::here("data_raw", "PPP.xls"), sheet="Data", skip=3)
PPPP <- read_excel(here::here("data_raw", "PPPP.xls"), sheet="Data", skip=3)
counrty_region <- read_excel(here::here("data_raw", "PPP.xls"), sheet="Metadata - Countries")
  
#clean happiness names
happiness <- happiness %>% 
  clean_names()

#clean country regions
counrty_region_clean <- counrty_region %>% 
  clean_names() %>% 
  select(-special_notes, -table_name)

#clean happiness with selected variables I think I got them all but you may need to check
happiness_clean<- happiness %>% 
  filter(!is.na(life_ladder)) %>% 
  select(country_name, year, life_ladder, log_gdp_per_capita, social_support, healthy_life_expectancy_at_birth, freedom_to_make_life_choices,generosity,perceptions_of_corruption,positive_affect,negative_affect,confidence_in_national_government,democratic_quality)

#ppp clean

#glimpse(PPP)
PPP_clean <- PPP %>% 
  gather(key='year', value='ppp', "1960":"2019") %>% 
  mutate(year=as.numeric(year)) %>% 
  filter(year>2005) %>% 
  clean_names() %>% 
  select(-indicator_name, -indicator_code) %>% 
  filter(!is.na(ppp)) 

#glimpse(PPP_clean)

#pppp clean
PPPP_clean <- PPPP %>% 
  gather(key='year', value='pppp', "1960":"2019") %>% 
  mutate(year=as.numeric(year)) %>% 
  filter(year>2005) %>% 
  clean_names() %>% 
  select(-indicator_name, -indicator_code) %>% 
  filter(!is.na(pppp)) 

#glimpse(PPPP_clean)

#income inequality join with happiness

glimpse(income_inequality)
## Observations: 195
## Variables: 242
## $ country <chr> "Afghanistan", "Albania", "Algeria", "Andorra", "Angola"…
## $ `1800`  <dbl> 30.5, 38.9, 56.2, 40.0, 57.2, 40.0, 47.7, 31.5, 38.7, 33…
## $ `1801`  <dbl> 30.5, 38.9, 56.2, 40.0, 57.2, 40.0, 47.7, 31.5, 38.7, 33…
## $ `1802`  <dbl> 30.5, 38.9, 56.2, 40.0, 57.2, 40.0, 47.7, 31.5, 38.7, 33…
## $ `1803`  <dbl> 30.5, 38.9, 56.2, 40.0, 57.2, 40.0, 47.7, 31.5, 38.7, 33…
## $ `1804`  <dbl> 30.5, 38.9, 56.2, 40.0, 57.2, 40.0, 47.7, 31.5, 38.7, 33…
## $ `1805`  <dbl> 30.5, 38.9, 56.2, 40.0, 57.2, 40.0, 47.7, 31.5, 38.7, 33…
## $ `1806`  <dbl> 30.5, 38.9, 56.2, 40.0, 57.2, 40.0, 47.7, 31.5, 38.7, 33…
## $ `1807`  <dbl> 30.5, 38.9, 56.2, 40.0, 57.2, 40.0, 47.7, 31.5, 38.7, 33…
## $ `1808`  <dbl> 30.5, 38.9, 56.2, 40.0, 57.2, 40.0, 47.7, 31.5, 38.7, 33…
## $ `1809`  <dbl> 30.5, 38.9, 56.2, 40.0, 57.2, 40.0, 47.7, 31.5, 38.7, 33…
## $ `1810`  <dbl> 30.5, 38.9, 56.2, 40.0, 57.2, 40.0, 47.7, 31.5, 38.7, 33…
## $ `1811`  <dbl> 30.5, 38.9, 56.2, 40.0, 57.2, 40.0, 47.7, 31.5, 38.7, 33…
## $ `1812`  <dbl> 30.5, 38.9, 56.2, 40.0, 57.2, 40.0, 47.7, 31.5, 38.7, 33…
## $ `1813`  <dbl> 30.5, 38.9, 56.2, 40.0, 57.2, 40.0, 47.7, 31.5, 38.7, 33…
## $ `1814`  <dbl> 30.5, 38.9, 56.2, 40.0, 57.2, 40.0, 47.7, 31.5, 38.7, 33…
## $ `1815`  <dbl> 30.5, 38.9, 56.2, 40.0, 57.2, 40.0, 47.7, 31.5, 38.7, 33…
## $ `1816`  <dbl> 30.5, 38.9, 56.2, 40.0, 57.2, 40.0, 47.7, 31.5, 38.7, 33…
## $ `1817`  <dbl> 30.5, 38.9, 56.2, 40.0, 57.2, 40.0, 47.6, 31.5, 38.7, 33…
## $ `1818`  <dbl> 30.5, 38.9, 56.3, 40.0, 57.1, 40.0, 47.5, 31.5, 38.7, 33…
## $ `1819`  <dbl> 30.5, 38.9, 56.4, 40.0, 56.9, 40.0, 47.3, 31.5, 38.7, 33…
## $ `1820`  <dbl> 30.5, 38.9, 56.5, 40.0, 56.8, 40.0, 47.1, 31.5, 38.7, 32…
## $ `1821`  <dbl> 30.5, 38.9, 56.6, 40.0, 56.6, 40.0, 46.8, 31.5, 38.7, 32…
## $ `1822`  <dbl> 30.5, 38.9, 56.7, 40.0, 56.4, 40.0, 46.5, 31.5, 38.7, 32…
## $ `1823`  <dbl> 30.5, 38.9, 56.8, 40.0, 56.1, 40.0, 46.2, 31.5, 38.7, 32…
## $ `1824`  <dbl> 30.5, 38.9, 56.9, 40.0, 55.9, 40.0, 45.8, 31.5, 38.7, 31…
## $ `1825`  <dbl> 30.5, 38.9, 57.0, 40.0, 55.7, 40.0, 45.6, 31.5, 38.7, 31…
## $ `1826`  <dbl> 30.5, 38.9, 57.2, 40.0, 55.4, 40.0, 45.2, 31.5, 38.7, 31…
## $ `1827`  <dbl> 30.5, 38.9, 57.4, 40.0, 55.1, 40.0, 44.8, 31.5, 38.7, 31…
## $ `1828`  <dbl> 30.5, 38.9, 57.5, 40.0, 54.7, 40.0, 44.3, 31.5, 38.7, 30…
## $ `1829`  <dbl> 30.5, 38.9, 57.7, 40.0, 54.4, 40.0, 43.9, 31.5, 38.7, 30…
## $ `1830`  <dbl> 30.5, 38.9, 57.9, 40.0, 54.1, 40.0, 43.4, 31.5, 38.7, 29…
## $ `1831`  <dbl> 30.5, 38.9, 58.1, 40.0, 53.7, 40.0, 42.9, 31.5, 38.7, 29…
## $ `1832`  <dbl> 30.5, 38.9, 58.2, 40.0, 53.4, 40.0, 42.5, 31.5, 38.7, 29…
## $ `1833`  <dbl> 30.5, 38.9, 58.4, 40.0, 53.1, 40.0, 42.0, 31.5, 38.7, 28…
## $ `1834`  <dbl> 30.5, 38.9, 58.6, 40.0, 52.7, 40.0, 41.6, 31.5, 38.7, 28…
## $ `1835`  <dbl> 30.5, 38.9, 58.8, 40.0, 52.4, 40.0, 41.1, 31.5, 38.7, 28…
## $ `1836`  <dbl> 30.5, 38.9, 58.9, 40.0, 52.1, 40.0, 40.7, 31.5, 38.7, 27…
## $ `1837`  <dbl> 30.5, 38.9, 59.1, 40.0, 51.7, 40.0, 40.2, 31.5, 38.7, 27…
## $ `1838`  <dbl> 30.5, 38.9, 59.3, 40.0, 51.4, 40.0, 39.8, 31.5, 38.7, 27…
## $ `1839`  <dbl> 30.5, 38.9, 59.5, 40.0, 51.1, 40.0, 39.3, 31.5, 38.7, 26…
## $ `1840`  <dbl> 30.5, 38.9, 59.6, 40.0, 50.7, 40.0, 38.9, 31.5, 38.7, 26…
## $ `1841`  <dbl> 30.5, 38.9, 59.8, 40.0, 50.4, 40.0, 38.4, 31.5, 38.7, 26…
## $ `1842`  <dbl> 30.5, 38.9, 60.0, 40.0, 50.1, 40.0, 38.0, 31.5, 38.7, 25…
## $ `1843`  <dbl> 30.5, 38.9, 60.2, 40.0, 49.7, 40.0, 37.5, 31.5, 38.7, 25…
## $ `1844`  <dbl> 30.6, 38.9, 60.3, 40.0, 49.4, 40.0, 37.1, 31.5, 38.7, 24…
## $ `1845`  <dbl> 30.7, 38.9, 60.4, 40.0, 49.1, 40.0, 36.7, 31.7, 38.7, 24…
## $ `1846`  <dbl> 30.9, 38.9, 60.5, 40.0, 48.7, 40.0, 36.5, 31.9, 38.8, 24…
## $ `1847`  <dbl> 31.2, 38.9, 60.4, 40.0, 48.4, 40.0, 36.3, 32.3, 38.8, 24…
## $ `1848`  <dbl> 31.5, 38.9, 60.3, 40.0, 48.1, 40.0, 36.2, 32.7, 38.9, 23…
## $ `1849`  <dbl> 31.9, 38.9, 60.1, 40.0, 47.7, 40.0, 36.3, 33.3, 39.0, 23…
## $ `1850`  <dbl> 32.4, 38.9, 59.8, 40.0, 47.4, 40.0, 36.4, 34.0, 39.1, 23…
## $ `1851`  <dbl> 33.0, 38.9, 59.4, 40.0, 47.1, 40.0, 36.7, 34.8, 39.3, 23…
## $ `1852`  <dbl> 33.6, 38.9, 59.0, 40.0, 46.7, 40.0, 37.0, 35.8, 39.5, 23…
## $ `1853`  <dbl> 34.3, 38.9, 58.5, 40.0, 46.4, 40.0, 37.5, 36.8, 39.6, 22…
## $ `1854`  <dbl> 35.0, 38.9, 57.9, 40.0, 46.1, 40.0, 38.0, 38.0, 39.8, 22…
## $ `1855`  <dbl> 35.8, 38.9, 57.2, 40.0, 45.7, 40.0, 38.6, 39.3, 40.1, 22…
## $ `1856`  <dbl> 36.7, 38.9, 56.5, 40.0, 45.4, 40.0, 39.4, 40.7, 40.3, 22…
## $ `1857`  <dbl> 37.7, 38.9, 55.7, 40.0, 45.1, 40.0, 40.2, 42.3, 40.6, 22…
## $ `1858`  <dbl> 38.6, 38.9, 54.8, 40.0, 44.7, 40.0, 41.1, 43.9, 40.9, 22…
## $ `1859`  <dbl> 39.6, 38.9, 53.9, 40.0, 44.4, 40.0, 42.1, 45.5, 41.2, 22…
## $ `1860`  <dbl> 40.5, 38.9, 53.0, 40.0, 44.1, 40.0, 43.0, 47.2, 41.5, 22…
## $ `1861`  <dbl> 41.5, 38.9, 52.1, 40.0, 43.7, 40.0, 44.0, 48.8, 41.8, 22…
## $ `1862`  <dbl> 42.4, 38.9, 51.2, 40.0, 43.4, 40.0, 44.9, 50.5, 42.1, 22…
## $ `1863`  <dbl> 43.4, 38.9, 50.3, 40.0, 43.1, 40.0, 45.8, 52.1, 42.4, 22…
## $ `1864`  <dbl> 44.3, 38.9, 49.4, 40.0, 42.7, 40.0, 46.8, 53.8, 42.6, 22…
## $ `1865`  <dbl> 45.0, 38.9, 48.5, 40.0, 42.4, 40.0, 47.6, 55.2, 42.9, 22…
## $ `1866`  <dbl> 45.7, 38.8, 47.7, 40.0, 42.1, 40.0, 48.4, 56.4, 43.1, 22…
## $ `1867`  <dbl> 46.2, 38.7, 46.9, 40.0, 41.7, 40.0, 49.0, 57.4, 43.2, 22…
## $ `1868`  <dbl> 46.7, 38.5, 46.1, 40.0, 41.4, 40.0, 49.6, 58.2, 43.3, 22…
## $ `1869`  <dbl> 47.0, 38.3, 45.3, 40.0, 41.1, 40.0, 50.0, 58.7, 43.4, 22…
## $ `1870`  <dbl> 47.2, 38.0, 44.5, 40.0, 40.7, 40.0, 50.4, 59.0, 43.4, 22…
## $ `1871`  <dbl> 47.3, 37.7, 43.8, 40.0, 40.4, 40.0, 50.7, 59.1, 43.3, 22…
## $ `1872`  <dbl> 47.2, 37.4, 43.1, 40.0, 40.1, 40.0, 50.9, 59.0, 43.2, 22…
## $ `1873`  <dbl> 47.1, 37.0, 42.4, 40.0, 39.7, 40.0, 51.0, 58.7, 43.1, 22…
## $ `1874`  <dbl> 46.8, 36.6, 41.8, 40.0, 39.4, 40.0, 50.9, 58.1, 42.9, 22…
## $ `1875`  <dbl> 46.4, 36.1, 41.1, 40.0, 39.1, 40.0, 50.8, 57.3, 42.7, 22…
## $ `1876`  <dbl> 45.9, 35.6, 40.5, 40.0, 38.7, 40.0, 50.7, 56.3, 42.4, 22…
## $ `1877`  <dbl> 45.3, 35.0, 39.9, 40.0, 38.4, 40.0, 50.4, 55.1, 42.0, 22…
## $ `1878`  <dbl> 44.7, 34.4, 39.4, 40.0, 38.1, 40.0, 50.0, 53.6, 41.7, 22…
## $ `1879`  <dbl> 44.0, 33.8, 38.8, 40.0, 37.7, 40.0, 49.6, 52.2, 41.3, 22…
## $ `1880`  <dbl> 43.4, 33.2, 38.2, 40.0, 37.4, 40.0, 49.2, 50.7, 40.9, 22…
## $ `1881`  <dbl> 42.7, 32.6, 37.7, 40.0, 37.1, 40.0, 48.8, 49.3, 40.5, 22…
## $ `1882`  <dbl> 42.1, 31.9, 37.1, 40.0, 36.7, 40.0, 48.4, 47.8, 40.1, 22…
## $ `1883`  <dbl> 41.4, 31.3, 36.6, 40.0, 36.4, 40.0, 48.1, 46.4, 39.7, 22…
## $ `1884`  <dbl> 40.8, 30.7, 36.0, 40.0, 36.1, 40.0, 47.7, 44.9, 39.4, 22…
## $ `1885`  <dbl> 40.2, 30.2, 35.5, 40.0, 35.8, 40.0, 47.3, 43.6, 39.0, 22…
## $ `1886`  <dbl> 39.7, 29.6, 35.0, 40.0, 35.6, 40.0, 47.0, 42.4, 38.7, 22…
## $ `1887`  <dbl> 39.2, 29.2, 34.6, 40.0, 35.5, 40.0, 46.8, 41.3, 38.4, 22…
## $ `1888`  <dbl> 38.8, 28.7, 34.3, 40.0, 35.5, 40.0, 46.6, 40.3, 38.1, 22…
## $ `1889`  <dbl> 38.5, 28.3, 34.0, 40.0, 35.5, 40.0, 46.5, 39.5, 37.9, 23…
## $ `1890`  <dbl> 38.3, 28.0, 33.7, 40.0, 35.6, 40.0, 46.4, 38.7, 37.7, 23…
## $ `1891`  <dbl> 38.1, 27.6, 33.5, 40.0, 35.8, 40.0, 46.4, 38.1, 37.6, 23…
## $ `1892`  <dbl> 38.0, 27.4, 33.3, 40.0, 36.0, 40.0, 46.4, 37.6, 37.4, 23…
## $ `1893`  <dbl> 38.0, 27.1, 33.2, 40.0, 36.4, 40.0, 46.4, 37.2, 37.3, 23…
## $ `1894`  <dbl> 38.0, 26.9, 33.2, 40.0, 36.8, 40.0, 46.5, 37.0, 37.3, 23…
## $ `1895`  <dbl> 38.1, 26.8, 33.2, 40.0, 37.2, 40.0, 46.7, 36.8, 37.3, 23…
## $ `1896`  <dbl> 38.2, 26.7, 33.3, 40.0, 37.8, 40.0, 46.9, 36.8, 37.3, 23…
## $ `1897`  <dbl> 38.5, 26.6, 33.4, 40.0, 38.4, 40.0, 47.1, 36.9, 37.3, 23…
## $ `1898`  <dbl> 38.8, 26.6, 33.5, 40.0, 39.1, 40.0, 47.5, 37.1, 37.4, 23…
## $ `1899`  <dbl> 39.1, 26.5, 33.7, 40.0, 39.8, 40.0, 47.8, 37.4, 37.4, 23…
## $ `1900`  <dbl> 39.4, 26.5, 33.8, 40.0, 40.5, 40.0, 48.1, 37.6, 37.5, 23…
## $ `1901`  <dbl> 39.7, 26.5, 34.0, 40.0, 41.2, 40.0, 48.4, 37.8, 37.5, 23…
## $ `1902`  <dbl> 39.9, 26.4, 34.2, 40.0, 41.9, 40.0, 48.7, 38.0, 37.6, 23…
## $ `1903`  <dbl> 40.2, 26.4, 34.3, 40.0, 42.6, 40.0, 49.0, 38.3, 37.7, 23…
## $ `1904`  <dbl> 40.5, 26.4, 34.5, 40.0, 43.3, 40.0, 49.3, 38.5, 37.7, 23…
## $ `1905`  <dbl> 40.8, 26.3, 34.6, 40.0, 44.0, 40.0, 49.5, 38.7, 37.8, 23…
## $ `1906`  <dbl> 41.0, 26.3, 34.8, 40.0, 44.7, 40.0, 49.8, 38.9, 37.8, 23…
## $ `1907`  <dbl> 41.2, 26.3, 35.0, 40.0, 45.4, 40.0, 49.9, 39.1, 37.8, 23…
## $ `1908`  <dbl> 41.3, 26.3, 35.2, 40.0, 46.1, 40.0, 50.1, 39.2, 37.8, 23…
## $ `1909`  <dbl> 41.5, 26.3, 35.3, 40.0, 46.8, 40.0, 50.2, 39.4, 37.8, 23…
## $ `1910`  <dbl> 41.5, 26.2, 35.5, 40.0, 47.5, 40.0, 50.2, 39.5, 37.7, 23…
## $ `1911`  <dbl> 41.5, 26.2, 35.7, 40.0, 48.3, 40.0, 50.2, 39.6, 37.6, 23…
## $ `1912`  <dbl> 41.5, 26.2, 35.9, 40.0, 49.0, 40.0, 50.2, 39.7, 37.5, 23…
## $ `1913`  <dbl> 41.5, 26.2, 36.0, 40.0, 49.7, 40.0, 50.2, 39.8, 37.4, 23…
## $ `1914`  <dbl> 41.4, 26.2, 36.2, 40.0, 50.4, 40.0, 50.0, 39.9, 37.3, 23…
## $ `1915`  <dbl> 41.3, 26.2, 36.4, 40.0, 51.2, 40.0, 49.9, 40.0, 37.1, 23…
## $ `1916`  <dbl> 41.1, 26.2, 36.6, 40.0, 51.9, 40.0, 49.7, 40.0, 36.7, 23…
## $ `1917`  <dbl> 40.9, 26.2, 36.8, 40.0, 52.7, 40.0, 49.5, 40.1, 36.6, 23…
## $ `1918`  <dbl> 40.6, 26.2, 37.0, 40.0, 53.4, 40.0, 49.2, 40.1, 36.4, 23…
## $ `1919`  <dbl> 40.4, 26.2, 37.2, 40.0, 54.2, 40.0, 48.9, 40.1, 36.2, 23…
## $ `1920`  <dbl> 40.2, 26.3, 37.4, 40.0, 54.9, 40.0, 48.6, 40.1, 35.9, 23…
## $ `1921`  <dbl> 39.9, 26.3, 37.6, 40.0, 55.7, 40.0, 48.3, 40.1, 35.8, 23…
## $ `1922`  <dbl> 39.7, 26.3, 37.8, 40.0, 56.4, 40.0, 48.1, 40.2, 35.8, 23…
## $ `1923`  <dbl> 39.4, 26.3, 38.0, 40.0, 57.2, 40.0, 47.8, 40.2, 35.5, 23…
## $ `1924`  <dbl> 39.2, 26.3, 38.2, 40.0, 57.9, 40.0, 47.5, 40.2, 35.0, 23…
## $ `1925`  <dbl> 39.0, 26.3, 38.3, 40.0, 58.5, 40.0, 47.2, 40.2, 34.4, 23…
## $ `1926`  <dbl> 38.7, 26.3, 38.5, 40.0, 59.0, 40.0, 47.0, 40.3, 33.8, 23…
## $ `1927`  <dbl> 38.5, 26.3, 38.7, 40.0, 59.5, 40.0, 46.6, 40.3, 33.5, 23…
## $ `1928`  <dbl> 38.3, 26.3, 38.8, 40.0, 59.9, 40.0, 46.3, 40.3, 33.2, 23…
## $ `1929`  <dbl> 38.0, 26.3, 38.9, 40.0, 60.2, 40.0, 46.1, 40.3, 33.0, 23…
## $ `1930`  <dbl> 37.8, 26.4, 39.0, 40.0, 60.4, 40.0, 46.0, 40.3, 33.1, 23…
## $ `1931`  <dbl> 37.6, 26.4, 39.1, 40.0, 60.6, 40.0, 46.0, 40.4, 32.7, 23…
## $ `1932`  <dbl> 37.4, 26.4, 39.2, 40.0, 60.7, 40.0, 46.1, 40.4, 32.4, 23…
## $ `1933`  <dbl> 37.1, 26.4, 39.3, 40.0, 60.7, 40.0, 46.1, 40.4, 32.3, 23…
## $ `1934`  <dbl> 36.9, 26.4, 39.3, 40.0, 60.7, 40.0, 46.2, 40.4, 32.1, 23…
## $ `1935`  <dbl> 36.7, 26.4, 39.4, 40.0, 60.7, 40.0, 46.0, 40.4, 31.3, 23…
## $ `1936`  <dbl> 36.5, 26.4, 39.4, 40.0, 60.4, 40.0, 46.2, 40.5, 31.5, 23…
## $ `1937`  <dbl> 36.3, 26.4, 39.4, 40.0, 60.1, 40.0, 46.7, 40.5, 31.9, 23…
## $ `1938`  <dbl> 36.1, 26.4, 39.4, 40.0, 59.9, 40.0, 47.3, 40.5, 32.2, 23…
## $ `1939`  <dbl> 35.9, 26.4, 39.4, 40.0, 59.6, 40.0, 48.5, 40.5, 32.3, 23…
## $ `1940`  <dbl> 35.7, 26.5, 39.4, 40.0, 59.4, 40.0, 49.5, 40.6, 31.9, 23…
## $ `1941`  <dbl> 35.5, 26.5, 39.5, 40.0, 59.1, 40.0, 50.1, 40.6, 31.2, 23…
## $ `1942`  <dbl> 35.2, 26.5, 39.5, 40.0, 58.9, 40.0, 50.4, 40.6, 30.7, 23…
## $ `1943`  <dbl> 35.0, 26.5, 39.5, 40.0, 58.6, 40.0, 50.9, 40.6, 30.9, 23…
## $ `1944`  <dbl> 34.8, 26.5, 39.5, 40.0, 58.4, 40.0, 51.3, 40.6, 31.1, 23…
## $ `1945`  <dbl> 34.6, 26.5, 39.5, 40.0, 58.1, 40.0, 51.1, 40.7, 31.2, 23…
## $ `1946`  <dbl> 34.4, 26.5, 39.5, 40.0, 57.9, 40.0, 51.0, 40.7, 32.4, 23…
## $ `1947`  <dbl> 34.3, 26.5, 39.5, 40.0, 57.6, 40.0, 50.3, 40.7, 32.7, 23…
## $ `1948`  <dbl> 34.2, 26.5, 39.6, 40.0, 57.4, 40.0, 49.2, 40.7, 33.1, 23…
## $ `1949`  <dbl> 34.1, 26.5, 39.6, 40.0, 57.2, 40.0, 47.7, 40.7, 33.4, 23…
## $ `1950`  <dbl> 34.0, 26.6, 39.6, 40.0, 57.0, 40.0, 46.4, 40.8, 35.1, 24…
## $ `1951`  <dbl> 34.0, 26.6, 39.6, 40.0, 56.8, 40.0, 45.7, 40.8, 35.8, 24…
## $ `1952`  <dbl> 34.0, 26.6, 39.6, 40.0, 56.6, 40.0, 44.7, 40.8, 36.2, 24…
## $ `1953`  <dbl> 34.1, 26.6, 39.6, 40.0, 56.5, 40.0, 43.6, 40.8, 36.5, 25…
## $ `1954`  <dbl> 34.3, 26.6, 39.7, 40.0, 56.4, 40.0, 42.9, 40.9, 36.5, 25…
## $ `1955`  <dbl> 34.4, 26.6, 39.7, 40.0, 56.3, 40.0, 41.9, 40.9, 35.4, 26…
## $ `1956`  <dbl> 34.5, 26.6, 39.7, 40.0, 56.2, 40.0, 42.0, 40.9, 35.2, 27…
## $ `1957`  <dbl> 34.6, 26.6, 39.7, 40.0, 56.1, 40.0, 42.2, 40.9, 35.2, 27…
## $ `1958`  <dbl> 34.6, 26.6, 39.7, 40.0, 56.0, 40.0, 42.4, 40.9, 35.2, 28…
## $ `1959`  <dbl> 34.5, 26.6, 39.7, 40.0, 55.9, 40.0, 41.9, 41.0, 35.3, 28…
## $ `1960`  <dbl> 34.4, 26.7, 39.7, 40.0, 55.8, 40.0, 41.4, 41.0, 35.2, 29…
## $ `1961`  <dbl> 34.1, 26.7, 39.8, 40.0, 55.7, 40.0, 41.4, 41.0, 35.3, 29…
## $ `1962`  <dbl> 33.7, 26.7, 39.8, 40.0, 55.6, 40.0, 41.5, 41.0, 35.4, 29…
## $ `1963`  <dbl> 33.2, 26.7, 39.8, 40.0, 55.5, 40.0, 40.7, 41.1, 35.2, 29…
## $ `1964`  <dbl> 32.7, 26.7, 39.8, 40.0, 55.4, 40.0, 40.0, 41.1, 35.1, 29…
## $ `1965`  <dbl> 32.2, 26.7, 39.8, 40.0, 55.3, 40.0, 39.4, 41.1, 35.1, 29…
## $ `1966`  <dbl> 31.7, 26.7, 39.8, 40.0, 55.2, 40.0, 38.8, 41.1, 34.8, 29…
## $ `1967`  <dbl> 31.2, 26.7, 39.9, 40.0, 55.2, 40.0, 38.2, 41.1, 34.7, 29…
## $ `1968`  <dbl> 30.8, 26.7, 39.9, 40.0, 55.1, 40.0, 37.6, 41.2, 34.5, 29…
## $ `1969`  <dbl> 30.5, 26.7, 39.9, 40.0, 54.9, 40.0, 36.7, 41.2, 34.2, 29…
## $ `1970`  <dbl> 30.5, 26.8, 39.9, 40.0, 54.8, 40.0, 36.1, 41.2, 33.8, 29…
## $ `1971`  <dbl> 30.8, 26.8, 39.9, 40.0, 54.7, 40.0, 35.6, 41.2, 33.6, 29…
## $ `1972`  <dbl> 31.2, 26.8, 39.9, 40.0, 54.6, 40.0, 35.3, 41.3, 33.4, 28…
## $ `1973`  <dbl> 31.6, 26.8, 40.0, 40.0, 54.5, 40.0, 35.2, 41.3, 33.3, 28…
## $ `1974`  <dbl> 31.9, 26.8, 40.0, 40.0, 54.4, 40.0, 35.5, 41.3, 33.2, 27…
## $ `1975`  <dbl> 32.1, 26.8, 40.0, 40.0, 54.4, 40.0, 36.1, 41.3, 33.1, 27…
## $ `1976`  <dbl> 32.2, 26.8, 40.0, 40.0, 54.3, 40.0, 36.9, 41.3, 33.1, 26…
## $ `1977`  <dbl> 32.2, 26.8, 40.0, 40.0, 54.2, 40.0, 37.9, 41.4, 33.0, 26…
## $ `1978`  <dbl> 32.2, 26.8, 40.0, 40.0, 54.1, 40.0, 38.8, 41.4, 32.8, 25…
## $ `1979`  <dbl> 32.2, 26.8, 40.1, 40.0, 54.0, 40.0, 39.7, 41.4, 32.5, 25…
## $ `1980`  <dbl> 32.2, 26.9, 40.1, 40.0, 53.9, 40.0, 40.4, 41.4, 32.2, 24…
## $ `1981`  <dbl> 32.1, 26.9, 40.1, 40.0, 53.8, 40.0, 41.0, 41.5, 32.0, 24…
## $ `1982`  <dbl> 32.0, 26.9, 40.1, 40.0, 53.7, 40.0, 41.5, 41.5, 31.9, 24…
## $ `1983`  <dbl> 32.1, 26.9, 40.1, 40.0, 53.6, 40.0, 41.8, 41.5, 31.9, 24…
## $ `1984`  <dbl> 32.5, 26.9, 40.1, 40.0, 53.5, 40.0, 42.1, 41.5, 32.2, 25…
## $ `1985`  <dbl> 33.0, 26.9, 40.1, 40.0, 53.4, 40.0, 42.9, 41.5, 32.4, 26…
## $ `1986`  <dbl> 33.7, 26.9, 40.2, 40.0, 53.3, 40.0, 43.7, 41.6, 32.6, 26…
## $ `1987`  <dbl> 34.7, 26.9, 40.0, 40.0, 53.2, 40.0, 44.5, 41.6, 32.8, 27…
## $ `1988`  <dbl> 35.4, 26.9, 39.8, 40.0, 53.1, 40.0, 45.3, 41.6, 32.9, 28…
## $ `1989`  <dbl> 36.0, 26.9, 39.4, 40.0, 53.0, 40.0, 46.1, 41.7, 33.0, 28…
## $ `1990`  <dbl> 36.4, 27.0, 38.8, 40.0, 52.9, 40.0, 46.1, 41.9, 33.0, 28…
## $ `1991`  <dbl> 36.7, 27.0, 38.1, 40.0, 52.8, 40.0, 45.9, 42.2, 33.0, 28…
## $ `1992`  <dbl> 36.7, 27.0, 37.4, 40.0, 52.8, 40.0, 45.9, 42.6, 32.9, 27…
## $ `1993`  <dbl> 36.8, 27.0, 36.7, 40.0, 52.7, 40.0, 46.4, 43.0, 32.8, 27…
## $ `1994`  <dbl> 36.8, 27.0, 36.1, 40.0, 52.6, 40.0, 46.9, 43.5, 32.7, 27…
## $ `1995`  <dbl> 36.8, 27.2, 35.5, 40.0, 52.5, 40.0, 47.7, 43.3, 32.7, 27…
## $ `1996`  <dbl> 36.8, 27.5, 34.9, 40.0, 52.4, 40.0, 48.8, 42.5, 32.8, 27…
## $ `1997`  <dbl> 36.8, 28.0, 34.4, 40.0, 52.3, 40.0, 49.6, 41.0, 32.9, 27…
## $ `1998`  <dbl> 36.8, 28.6, 34.0, 40.0, 52.2, 40.0, 50.0, 39.4, 33.0, 27…
## $ `1999`  <dbl> 36.8, 29.4, 33.5, 40.0, 52.1, 40.0, 50.8, 37.6, 33.2, 28…
## $ `2000`  <dbl> 36.8, 30.2, 33.1, 40.0, 51.8, 40.0, 51.7, 36.2, 33.3, 28…
## $ `2001`  <dbl> 36.8, 30.7, 32.6, 40.0, 51.3, 40.0, 51.7, 35.0, 33.4, 29…
## $ `2002`  <dbl> 36.8, 31.0, 32.2, 40.0, 50.6, 40.0, 51.4, 35.3, 33.5, 29…
## $ `2003`  <dbl> 36.8, 31.1, 31.7, 40.0, 49.7, 40.0, 50.8, 35.3, 33.7, 29…
## $ `2004`  <dbl> 36.8, 31.0, 31.2, 40.0, 48.5, 40.0, 49.4, 34.2, 34.0, 29…
## $ `2005`  <dbl> 36.8, 30.7, 30.8, 40.0, 47.3, 40.0, 47.9, 33.5, 34.3, 29…
## $ `2006`  <dbl> 36.8, 30.4, 30.3, 40.0, 46.2, 40.0, 46.7, 32.7, 34.7, 29…
## $ `2007`  <dbl> 36.8, 30.2, 29.9, 40.0, 45.0, 40.0, 45.8, 30.8, 34.9, 30…
## $ `2008`  <dbl> 36.8, 30.0, 29.4, 40.0, 44.1, 40.0, 44.9, 29.6, 35.0, 30…
## $ `2009`  <dbl> 36.8, 29.7, 29.0, 40.0, 43.4, 40.0, 44.0, 29.6, 34.2, 30…
## $ `2010`  <dbl> 36.8, 29.5, 28.5, 40.0, 42.9, 40.0, 43.0, 29.2, 33.6, 30…
## $ `2011`  <dbl> 36.8, 29.3, 28.2, 40.0, 42.7, 40.0, 42.3, 29.5, 32.9, 30…
## $ `2012`  <dbl> 36.8, 29.1, 27.9, 40.0, 42.6, 40.0, 41.8, 30.2, 32.4, 30…
## $ `2013`  <dbl> 36.8, 29.0, 27.7, 40.0, 42.6, 40.0, 41.6, 30.7, 31.9, 30…
## $ `2014`  <dbl> 36.8, 29.0, 27.6, 40.0, 42.6, 40.0, 41.6, 31.3, 32.2, 30…
## $ `2015`  <dbl> 36.8, 29.0, 27.6, 40.0, 42.6, 40.0, 41.8, 31.9, 32.3, 30…
## $ `2016`  <dbl> 36.8, 29.0, 27.6, 40.0, 42.6, 40.0, 42.1, 32.3, 32.3, 30…
## $ `2017`  <dbl> 36.8, 29.0, 27.6, 40.0, 42.6, 40.0, 42.3, 32.5, 32.3, 30…
## $ `2018`  <dbl> 36.8, 29.0, 27.6, 40.0, 42.6, 40.0, 42.4, 32.6, 32.3, 30…
## $ `2019`  <dbl> 36.8, 29.0, 27.6, 40.0, 42.6, 40.0, 42.4, 32.6, 32.3, 30…
## $ `2020`  <dbl> 36.8, 29.0, 27.6, 40.0, 42.6, 40.0, 42.4, 32.6, 32.3, 30…
## $ `2021`  <dbl> 36.8, 29.0, 27.6, 40.0, 42.6, 40.0, 42.4, 32.6, 32.3, 30…
## $ `2022`  <dbl> 36.8, 29.0, 27.6, 40.0, 42.6, 40.0, 42.4, 32.6, 32.3, 30…
## $ `2023`  <dbl> 36.8, 29.0, 27.6, 40.0, 42.6, 40.0, 42.4, 32.6, 32.3, 30…
## $ `2024`  <dbl> 36.8, 29.0, 27.6, 40.0, 42.6, 40.0, 42.4, 32.6, 32.3, 30…
## $ `2025`  <dbl> 36.8, 29.0, 27.6, 40.0, 42.6, 40.0, 42.4, 32.6, 32.3, 30…
## $ `2026`  <dbl> 36.8, 29.0, 27.6, 40.0, 42.6, 40.0, 42.4, 32.6, 32.3, 30…
## $ `2027`  <dbl> 36.8, 29.0, 27.6, 40.0, 42.6, 40.0, 42.4, 32.6, 32.3, 30…
## $ `2028`  <dbl> 36.8, 29.0, 27.6, 40.0, 42.6, 40.0, 42.4, 32.6, 32.3, 30…
## $ `2029`  <dbl> 36.8, 29.0, 27.6, 40.0, 42.6, 40.0, 42.4, 32.6, 32.3, 30…
## $ `2030`  <dbl> 36.8, 29.0, 27.6, 40.0, 42.6, 40.0, 42.4, 32.6, 32.3, 30…
## $ `2031`  <dbl> 36.8, 29.0, 27.6, 40.0, 42.6, 40.0, 42.4, 32.6, 32.3, 30…
## $ `2032`  <dbl> 36.8, 29.0, 27.6, 40.0, 42.6, 40.0, 42.4, 32.6, 32.3, 30…
## $ `2033`  <dbl> 36.8, 29.0, 27.6, 40.0, 42.6, 40.0, 42.4, 32.6, 32.3, 30…
## $ `2034`  <dbl> 36.8, 29.0, 27.6, 40.0, 42.6, 40.0, 42.4, 32.6, 32.3, 30…
## $ `2035`  <dbl> 36.8, 29.0, 27.6, 40.0, 42.6, 40.0, 42.4, 32.6, 32.3, 30…
## $ `2036`  <dbl> 36.8, 29.0, 27.6, 40.0, 42.6, 40.0, 42.4, 32.6, 32.3, 30…
## $ `2037`  <dbl> 36.8, 29.0, 27.6, 40.0, 42.6, 40.0, 42.4, 32.6, 32.3, 30…
## $ `2038`  <dbl> 36.8, 29.0, 27.6, 40.0, 42.6, 40.0, 42.4, 32.6, 32.3, 30…
## $ `2039`  <dbl> 36.8, 29.0, 27.6, 40.0, 42.6, 40.0, 42.4, 32.6, 32.3, 30…
## $ `2040`  <dbl> 36.8, 29.0, 27.6, 40.0, 42.6, 40.0, 42.4, 32.6, 32.3, 30…
income_inequality_clean <- income_inequality %>% 
  gather(key='year', value='inequality', "1880":"2020") %>% 
  filter(year > 2005)%>%
  select(country, year,inequality) %>% 
  mutate(year=as.double(year)) %>% 
  inner_join(happiness_clean, by=c("country"="country_name", "year"="year"))

 #glimpse(income_inequality_clean)

#data full
full_data <- income_inequality_clean %>% 
  inner_join(PPP_clean, by=c("country"="country_name", 'year'='year')) %>% 
  inner_join(PPPP_clean, by=c("country"="country_name", 'year'='year')) %>% 
  select(-country_code.x) %>% 
  inner_join(counrty_region_clean, by=c("country_code.y"="country_code")) 

glimpse(full_data)
## Observations: 1,403
## Variables: 19
## $ country                           <chr> "Armenia", "Austria", "Azerbai…
## $ year                              <dbl> 2006, 2006, 2006, 2006, 2006, …
## $ inequality                        <dbl> 32.7, 29.8, 31.9, 33.0, 28.0, …
## $ life_ladder                       <dbl> 4.289311, 7.122211, 4.727871, …
## $ log_gdp_per_capita                <dbl> 8.717719, 10.657212, 9.279043,…
## $ social_support                    <dbl> 0.6818768, 0.9363504, 0.854414…
## $ healthy_life_expectancy_at_birth  <dbl> 64.80, 70.76, 61.88, 59.02, 61…
## $ freedom_to_make_life_choices      <dbl> 0.5201978, 0.9413823, 0.771528…
## $ generosity                        <dbl> -0.216674119, 0.300667107, -0.…
## $ perceptions_of_corruption         <dbl> 0.8495131, 0.4901112, 0.774117…
## $ positive_affect                   <dbl> 0.4941210, 0.8231047, 0.511687…
## $ negative_affect                   <dbl> 0.4694188, 0.1738117, 0.275695…
## $ confidence_in_national_government <dbl> 0.3443375, 0.4970378, 0.754706…
## $ democratic_quality                <dbl> -0.50248164, 1.22430921, -1.18…
## $ ppp                               <dbl> 1.600727e+02, 8.609180e-01, 2.…
## $ country_code.y                    <chr> "ARM", "AUT", "AZE", "BGD", "B…
## $ pppp                              <dbl> 149.7049751, 0.8785690, 0.2253…
## $ region                            <chr> "Europe & Central Asia", "Euro…
## $ income_group                      <chr> "Upper middle income", "High i…
# IMPORTING MY DATA 

suicide <- read_csv(here::here('data_raw',"newdata3.csv"))
income_per_person <- read_csv(here::here('data_raw',"income_per_person.csv"))
WH_2019 <- read_csv(here::here('data_raw',"WH_2019.csv"))
WH_2018 <- read_csv(here::here('data_raw',"WH_2018.csv"))
WH_2017 <- read_csv(here::here('data_raw',"WH_2017.csv"))
WH_2016 <- read_csv(here::here('data_raw',"WH_2016.csv"))
WH_2015 <- read_csv(here::here('data_raw',"WH_2015.csv")) 

#glimpse(WH_2019)

# CLEANING THE WH DATA AND GETTING THE SCORES 
#################################################################
WH2019 <- WH_2019 %>% 
  janitor::clean_names() %>% 
  mutate(year = 2019) %>% 
  rename( 
    rank = overall_rank, 
    life_ladder = score,
    log_gdp_per_capita = gdp_per_capita,
    healthy_life_expectancy_at_birth  = healthy_life_expectancy,
    country = country_or_region) %>% 
  select(-rank)
#glimpse(WH2019)

# JOIN THE WH DATA TO THE HAPPINESS DATA 

adjusted_full <- full_data %>% 
  full_join(WH2019)

#glimpse(adjusted_full)

# FILTER THE INCOME DATA 
#################################################################
income <- income_per_person %>% 
  gather(key = "year", value = "income_per_person", -country) %>% 
  mutate(year = as.double(year)) %>% 
  filter(year %in% c(2015, 2016, 2017, 2018, 2019))

#glimpse(income)

# full join this to djuated data by year and country 

 WH_Income <- adjusted_full %>% 
  full_join(income, by = c("country", "year"))

#glimpse(WH_Income)  

 
# FILTER SUICIDE DATA TO JUST HAVE COUNTRY YEAR, SUICIDE RATE 
suicide_rates <- suicide %>% 
  gather(key = "year", value = "suicide_rate", -country, -sex) %>% 
  mutate(year = as.double(year)) %>% 
  filter(sex == "Both sexes") %>% 
  select(-sex)
#glimpse(suicide_rates) 

# JOIN TO THE ADJUSTED DATA SET
final_data <- WH_Income %>% 
  full_join(suicide_rates, by = c("country", "year"))


final_data <- final_data%>%
  rename(
    'lifeExpAtBirth' = 'healthy_life_expectancy_at_birth',
    'freedom' = 'freedom_to_make_life_choices',
    'trustInGov' = 'confidence_in_national_government',
    'demoQuality' = 'democratic_quality',
    'incomePperson' = 'income_per_person',
    'suicideR' = 'suicide_rate',
    'perceptionCorrupt' = 'perceptions_of_corruption'
  )

map_data <- final_data%>%
  filter(year == 2019)%>%
  select(life_ladder, region=country)%>%
  spread(key = region, value = life_ladder)%>%
  rename('USA' = 'United States')%>%
  gather(key= 'region', value = 'life_ladder', Afghanistan:Zimbabwe)


glimpse(final_data)
## Observations: 2,421
## Variables: 21
## $ country            <chr> "Armenia", "Austria", "Azerbaijan", "Banglade…
## $ year               <dbl> 2006, 2006, 2006, 2006, 2006, 2006, 2006, 200…
## $ inequality         <dbl> 32.7, 29.8, 31.9, 33.0, 28.0, 40.2, 55.1, 62.…
## $ life_ladder        <dbl> 4.289311, 7.122211, 4.727871, 4.318909, 5.657…
## $ log_gdp_per_capita <dbl> 8.717719, 10.657212, 9.279043, 7.616417, 9.42…
## $ social_support     <dbl> 0.6818768, 0.9363504, 0.8544149, 0.6720022, 0…
## $ lifeExpAtBirth     <dbl> 64.80, 70.76, 61.88, 59.02, 61.10, 50.10, 59.…
## $ freedom            <dbl> 0.5201978, 0.9413823, 0.7715282, 0.6116642, 0…
## $ generosity         <dbl> -0.216674119, 0.300667107, -0.253262460, 0.07…
## $ perceptionCorrupt  <dbl> 0.8495131, 0.4901112, 0.7741172, 0.7859162, 0…
## $ positive_affect    <dbl> 0.4941210, 0.8231047, 0.5116876, 0.5999454, 0…
## $ negative_affect    <dbl> 0.4694188, 0.1738117, 0.2756951, 0.3207928, 0…
## $ trustInGov         <dbl> 0.3443375, 0.4970378, 0.7547066, 0.6137370, 0…
## $ demoQuality        <dbl> -0.50248164, 1.22430921, -1.18666148, -0.9783…
## $ ppp                <dbl> 1.600727e+02, 8.609180e-01, 2.211839e-01, 1.7…
## $ country_code.y     <chr> "ARM", "AUT", "AZE", "BGD", "BLR", "BEN", "BO…
## $ pppp               <dbl> 149.7049751, 0.8785690, 0.2253549, 18.3772035…
## $ region             <chr> "Europe & Central Asia", "Europe & Central As…
## $ income_group       <chr> "Upper middle income", "High income", "Upper …
## $ incomePperson      <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
## $ suicideR           <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…

Introduction

The World Happiness Report is a landmark survey of the state of global happiness. The first report was published in 2012 and continues to gain global recognition as governments and organizations increasingly use happiness indicators to better increase the state of their country. Measurements of well-being can be used effectively to assess the progress of nations.

In this report we look into the pursuit of happiness. This ‘pursuit’ has been a vital part of life as we know it. But what really does happiness look like across the globe? The pursuit of happiness may look different person to person, but are there similarites amoungst nations? Can regions increase their happiness score by utilizing specific factors of life?

Research Question

What economic and social factors have the most significant impact on the overall level of happiness across the globe?

Data Sources

In our research we utilize four very specific sources in order to reflect the most up to date and trusted variables in accordance to happiness.

Source 1: The World Happiness Report

worldhappiness.report.

This data provided original statistics on over 150 countries around the world. The variables included are appended at the end of this report. The happiness scores were developed from the Gallup World Poll. People answered a main life evaluation question that was asked as a Cantril Ladder. Each person was asked on a scale of 0 (worst life) to 10 (best life), how they would rate their living situation right now. Gallup then used weights to make the estimates representative of the country as a whole.

Source 2: WHO Suicide Statistic

kaggle.com who.int

Information in this set was retrieved from the Kaggle dataset, which was taken directly from the World Health Organization (W.H.O.). This information was gathered by the Global Health Observatory data repository. As more recent and revised data is released, the data is updated.

Source 3: Gapminder World Data

gapminder.org

The data source provides information such as country name, year, Gross Domestic Product (GDP), and Income per person for that country. Gapminder is an independent foundation that focuses on the collection of world data. The data is collected in collaboration with universities, The UN, public agencies, and government agencies around the world.

Source 4: World Bank

worldbank.org

This allowed us to find global statistics on the Purchasing Power Parity (PPP). The data comes straight from the World Bank. The data set provides statistics on the PPP from years 1990 - 2018. These world development indicators are compiled from officially recognized international sources that represent the “most current and accurate global development data available”.It includes national, regional, and global estimates, therefore this data is vaild and can be trusted.

#map graph

#view(world)

world <- map_data('world') %>% 
  left_join(map_data, by = 'region') %>% 
  filter(region!="Antarctica")

glimpse(world)
## Observations: 94,680
## Variables: 7
## $ long        <dbl> -69.89912, -69.89571, -69.94219, -70.00415, -70.0661…
## $ lat         <dbl> 12.45200, 12.42300, 12.43853, 12.50049, 12.54697, 12…
## $ group       <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2…
## $ order       <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 12, 13, 14, 15, 16, 1…
## $ region      <chr> "Aruba", "Aruba", "Aruba", "Aruba", "Aruba", "Aruba"…
## $ subregion   <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
## $ life_ladder <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 3.203, 3.203…
ggplot(world)+
  geom_polygon(aes(x=long, y=lat, group=group,fill=life_ladder))+
  scale_fill_viridis(option  = "plasma")+
  theme_void()+
  labs(fill= "happiness",
       title= "2019 World Happiness",caption = "source: https://worldhappiness.report/ed/2019/#read")+
  theme(plot.caption = element_text(face = 'bold.italic', hjust = 1),
        plot.title = element_text(hjust = .5))

Immediatley we see that the world is heavily divided in accordance to a countries happiness level. The more lively colors protrayed in North America, Austrailia, and hints in Northern Europe seem to have a much higher level of happiness than those in Central Africa. Countries colored in gray were not included in the data.

Take a look at the happiness level now divided by region.

# regional happiness

summaryRegionHappiness <- final_data%>%
  filter(year == 2018)%>%
  filter(!is.na(region))

summaryRegionHappiness <- summaryRegionHappiness%>%
  group_by(region)%>%
  summarise(meanHappiness = mean(life_ladder))%>%
  mutate(region = fct_reorder(region,meanHappiness))

summaryRegionHappiness
## # A tibble: 7 x 2
##   region                     meanHappiness
##   <fct>                              <dbl>
## 1 East Asia & Pacific                 5.75
## 2 Europe & Central Asia               6.27
## 3 Latin America & Caribbean           6.02
## 4 Middle East & North Africa          5.70
## 5 North America                       7.03
## 6 South Asia                          4.55
## 7 Sub-Saharan Africa                  4.51
ggplot(summaryRegionHappiness)+
  geom_col(aes(x = region, y = meanHappiness, fill = meanHappiness))+
  coord_flip()+
  scale_y_continuous(expand = expand_scale(mult=c(0,.5)))+
  labs(x = 'Mean happiness score', y = 'Region', title = 'Mean Happiness Score per Region', fill = 'Mean Happiness',caption = "source: https://worldhappiness.report/ed/2019/#read")+
  scale_fill_viridis(option= 'plasma')+
  theme_half_open()+
    theme(
    plot.title = element_text(hjust = .5),
    plot.caption = element_text(face = 'bold.italic', hjust = 0)
  )+
  annotate('text', y = 7.6, x = 'North America',
           label = '7.03' )+
   annotate('text', y = 6.8, x = 'Europe & Central Asia',
           label = '6.27' )+
   annotate('text', y = 6.6, x = 'Latin America & Caribbean',
           label = '6.02' )+
   annotate('text', y = 6.4, x = 'East Asia & Pacific',
           label = '5.75' )+
   annotate('text', y = 6.3, x = 'Middle East & North Africa',
           label = '5.70' )+
   annotate('text', y = 5.1, x = 'South Asia',
           label = '4.55' )+
    annotate('text', y = 5.1, x = 'Sub-Saharan Africa',
           label = '4.51' )

This chart will be the base visualization for the remainder of the report. We have compared specific social and economical factors in order to see what motivates happiness in some regions compared to others. You can follow the region by their mean happiness score denoted by the color of the bar. Yellow being most happy and dark blue being least. This will help you better understand the influence and weight of the following factors.

#overall correlation

summaryFactors <- final_data%>%
  filter(year == 2018)

summaryFactors <- summaryFactors%>%
  select(inequality,life_ladder,incomePperson,ppp,trustInGov,generosity,freedom,lifeExpAtBirth,social_support, perceptionCorrupt) 

#glimpse(summaryFactors)

summaryFactors%>%
  ggcorr(label = TRUE, label_round = 2,
          label_color = 'white',
          colors = "RdBu", hjust = .70, layout.exp = 4)

Using the above correlation matrix we observe that income per person, social support, and healthy life expectancy at birth are highly correlated with happiness overall. These factors can be divided into economic factors as well as social.

Below we highlight important economic factors, and later on social factors.

Each factor is of importance in order to clearly presume factors that differentaite happiness in specific regions of the world.

Economic Factors

Region Mean Log(GDP) Mean PPP Mean Income per Person
East Asia & Pacific 9.58 1216.689 36.98
Europe & Central Asia 10.21 9.99 31.63
Latin America & Caribbean 9.26 139.42 46.79
Middle East & North Africa 9.84 124.50 36.23
North America 10.81 1.12 36.30
South Asia 8.75 32.20 35.20
Sub - Sahran Africa 7.98 480.58 42.94

PPP is measured by finding the values (in USD) of consumer goods that are present in each country. If that good costs $100 in the US and $200 in the United Kindom, then the purchasing power parity exchange rate is 1:2.
GDP is a measure used to evaluate the health of a country’s economy. It is the total value of the goods and services produced in a country during a specific period of time.

#economic graphs and description

#scatterplot gdpvshapiness
data_2018 <- full_data %>% 
  filter(year==2018)
corr_gdp <- round(cor(data_2018$log_gdp_per_capita, data_2018$life_ladder, method = "pearson", use="complete.obs"),2)
corr_ineq <- round(cor(data_2018$inequality, data_2018$life_ladder, method = "pearson", use="complete.obs"),2)
corr_ppp <- round(cor(data_2018$ppp, data_2018$life_ladder, method = "pearson", use="complete.obs"),2)

avg_region <- data_2018 %>% 
  filter(!is.na(log_gdp_per_capita)) %>% 
  group_by(region) %>% 
  summarize(average_gdp=mean(log_gdp_per_capita),
         average_ppp=mean(ppp),
         average_ineq=mean(inequality),
         average_happ=mean(life_ladder)) 

avg_region
## # A tibble: 7 x 5
##   region                     average_gdp average_ppp average_ineq
##   <chr>                            <dbl>       <dbl>        <dbl>
## 1 East Asia & Pacific               9.58     1217.           37.0
## 2 Europe & Central Asia            10.2         9.99         31.6
## 3 Latin America & Caribbean         9.26      139.           46.8
## 4 Middle East & North Africa        9.84      125.           36.2
## 5 North America                    10.8         1.12         36.3
## 6 South Asia                        8.75       32.2          35.2
## 7 Sub-Saharan Africa                7.98      481.           42.9
##   average_happ
##          <dbl>
## 1         5.69
## 2         6.27
## 3         6.02
## 4         5.55
## 5         7.03
## 6         4.55
## 7         4.51
#scatterplot gdpvshapiness
ggplot(data_2018,aes(x=log_gdp_per_capita, y=life_ladder))+
  geom_point()+
  theme_minimal_grid()+
  annotate(geom='text', x=7.5, y=7.5, label=str_c('r= ',corr_gdp), hjust=0, size=7, color = 'red')+
  labs(title="GDP vs. Happiness 2018",
       x="Log(GDP)",
       y="Happiness", caption = "Source: https://worldhappiness.report/ed/2019/#read")+
  theme(plot.caption = element_text(face = 'bold.italic', hjust = 1))

avg_region_gdp <- avg_region %>% 
  mutate(region=fct_reorder(region,average_gdp))

  ggplot(avg_region_gdp)+
    geom_col(aes(x=region, y=average_gdp, fill=average_happ))+
    scale_fill_viridis(option="plasma")+
    labs(x='Region',
         y='Average Log GDP',
         title='Happiness by region and GDP',
        fill='happiness', caption = "Source: https://worldhappiness.report/ed/2019/#read")+
    coord_flip()+
    theme_minimal_vgrid()+ 
    theme(plot.caption = element_text(face = 'bold.italic', hjust = 1))+
   scale_y_continuous(expand = expand_scale(mult = c(0,0.5)))

The charts above take a deeper look in to the gross domestic product of a country and their happiness. GDP has a high positive correlation,.78, with happiness. In the bar chart, it shows that regions with a high GDP also have a higher happiness score. Although GDP isn’t an indicator of how much money one has, it is interesting to note that there is such a high correlation with a country’s economic health and the level of happiness.

#income inequality vs happiness
ggplot(data_2018, aes(x=inequality, y=life_ladder))+
  geom_point()+
  theme_minimal_grid()+
  annotate(geom='text', x=7.5, y=7.5, label=str_c('r= ',corr_ineq), hjust=0, size=6, color = 'red')+
  labs(title="Income Inequality vs. Happiness 2018",
       x="Income Inequality",
       y="Happiness", caption = 'Source: https://www.gapminder.org/data/')+
  theme(plot.caption = element_text(face = 'bold.italic', hjust = 1))

avg_region_ineq <- avg_region %>% 
  mutate(region=fct_reorder(region,average_ineq))

 ggplot(avg_region_ineq)+
    geom_col(aes(x=region, y=average_ineq,fill=average_happ))+
   scale_fill_viridis(option="plasma")+
   labs(x='Region',
         y='Income Inequality',
         title='Happiness by Region and Income Inequality',
        fill='happiness',caption = 'Source: https://www.gapminder.org/data/')+
    coord_flip()+
   theme_minimal_vgrid()+
   theme(plot.caption = element_text(face = 'bold.italic', hjust = 1))+
   scale_y_continuous(expand = expand_scale(mult = c(0,0.5)))

The next charts highlights income inequality in a country and their corresponding happiness level. One would assume that a lower income inequality would lead to a greater happiness but there is not much of a correlation between such factors. In the bar chart, we see that South Asia has the second lowest income inequality gap yet is one of the unhappiest regions. These graphs suggest that lower income inequality does not nessesarily have an affect on happiness as we previously thought.

#purchasing power vs happiness GDP based
ggplot(data_2018, aes(x=ppp, y=life_ladder))+
  geom_point()+
  scale_fill_viridis(option="plasma")+
  theme_minimal_grid()+
  annotate(geom='text', x=1000, y=7.5, label=str_c('r= ',corr_ppp), hjust=0, size=7, color = 'red')+
  labs(title="Purchasing Power Parity Based on GDP vs. Happiness 2018",
       x="Purcasing Power Parity",
       y="Happiness", caption = 'Source: https://data.worldbank.org/indicator/')+
  theme(plot.caption = element_text(face = 'bold.italic', hjust = 1))

avg_region_ppp <- avg_region %>% 
  mutate(region=fct_reorder(region,average_ppp))

 ggplot(avg_region_ppp)+
    geom_col(aes(x=region, y=average_ppp, fill=average_happ))+
   scale_fill_viridis(option="plasma")+
   labs(x='Region',
         y='Purchasing Power Parity (PPP)',
         title='Happiness by Region and PPP',
        fill='happiness', caption = 'Source: https://data.worldbank.org/indicator/')+
    coord_flip()+
   theme_minimal_vgrid()+
    theme(plot.caption = element_text(face = 'bold.italic', hjust = 1))+
   scale_y_continuous(expand = expand_scale(mult = c(0,0.5)))

Next, we looked at purchasing power parity in a country. Purchasing power parity, or PPP, is a metric that compares economic productivity and standards of living between countries. It compares different currencies through a “basket of goods” approach. For example, how much one can buy for how much they make. This statistic was particularly interesting because we expected countries with a higher PPP to have greater happiness but, there seems to be no correlation at all, -0.16 to be exacct. The two happiest regions, North America and Europe & Central Asia actually have the lowest average PPP which may be because of the higher standards of living in western countries.

################
# CHART 1
# Suicide rate vs Happiness 
################

suicide_happiness <- final_data %>% 
  select(country, year, life_ladder, log_gdp_per_capita, incomePperson, suicideR)%>% 
  filter(year == 2016)%>% 
  filter(!is.na(suicideR)) %>% 
  filter(!is.na(life_ladder))

#glimpse(suicide_happiness)

ggplot(suicide_happiness) + 
  geom_point(aes(x = suicideR, y = life_ladder)) + 
  theme_half_open(font_size = 11) + 
  labs( title = "Suicide Rate vs Happiness 2016", 
        y = "Happiness Score", 
        x = "Suicide Rate", 
        caption = "Source: https://apps.who.int/gho/data/node.main.MHSUICIDE")  + 
  theme(plot.caption = element_text(face = 'bold.italic', hjust = 1))+
  scale_color_viridis(discrete = FALSE)  + 
  annotate('text', x = 25, y = 7.5, hjust = 0, 
           label = 'R = 0.2695', 
           size = 3.5, color = 'red')

ggplot(suicide_happiness) + 
  geom_point(aes(x = suicideR, y = life_ladder, color = incomePperson), alpha = 0.8, size = 2.2) + 
  theme_half_open(font_size = 11) + 
  labs( title = "Suicide Rate vs Happiness 2016", 
        y = "Happiness Score", 
        x = "Suicide Rate", 
        color = "Income Per Person", 
        caption = "Source: https://apps.who.int/gho/data/node.main.MHSUICIDE")  + 
  theme(plot.caption = element_text(face = 'bold.italic', hjust = 1))+
  scale_color_viridis(discrete = FALSE)  + 
  annotate('text', x = 25, y = 7.5, hjust = 0, 
           label = 'R = 0.2695', 
           size = 3.5, color = 'red')

# R value 
########################################################
mean_suic <- mean(suicide_happiness$suicideR) 
mean_hap <- mean(suicide_happiness$life_ladder)

r_value_ <- suicide_happiness %>% 
  mutate(suicide_min_mean = (suicideR - mean_suic)*(suicideR- mean_suic)) %>% 
  mutate(happiness_min_mean = (life_ladder - mean_hap)*(life_ladder - mean_hap))%>% 
  mutate(happiness_suic = (suicideR - mean_suic)*(life_ladder - mean_hap))
#glimpse(r_value_)#122

sum_suic <- sum(r_value_$suicide_min_mean)
sum_hap <- sum(r_value_$happiness_min_mean)
sum_suic_hap <- sum(r_value_$happiness_suic)
s_suic <- sqrt(sum_suic/(122-1))
s_hap <- sqrt(sum_hap/(122-1))
s_suic_hap <- sum_suic_hap/(122-1)

r_s_h<- (s_suic_hap)/(s_suic*s_hap)
r_s_h 
## [1] 0.2504473
# 0.2695 

This first chart displays a scatter plot of the suicide rate vs happiness score for each country in the year 2016. As shown by the chart there is no relationship between the two factors, and is proven by the calculated r value of 0.2695.

However, when each point on the chart is colored by the country’s average income per person, it is clear that a majority of the countries represented have very low income.

Therefore, from this first graph we were able to conclude that happiness score and sucicide rate do not have an effect on one another, however income and happiness might. This led to the creation of our next graph where we compare happiness and income.

################
# CHART 2
# Suicide rate vs Income 
################

#find mean of total happiness scores of those listed 

suicide_happiness2 <- glimpse(final_data) %>% 
  filter(year == 2019) %>%
  filter(!is.na(incomePperson)) %>% 
  filter(!is.na(life_ladder))
## Observations: 2,421
## Variables: 21
## $ country            <chr> "Armenia", "Austria", "Azerbaijan", "Banglade…
## $ year               <dbl> 2006, 2006, 2006, 2006, 2006, 2006, 2006, 200…
## $ inequality         <dbl> 32.7, 29.8, 31.9, 33.0, 28.0, 40.2, 55.1, 62.…
## $ life_ladder        <dbl> 4.289311, 7.122211, 4.727871, 4.318909, 5.657…
## $ log_gdp_per_capita <dbl> 8.717719, 10.657212, 9.279043, 7.616417, 9.42…
## $ social_support     <dbl> 0.6818768, 0.9363504, 0.8544149, 0.6720022, 0…
## $ lifeExpAtBirth     <dbl> 64.80, 70.76, 61.88, 59.02, 61.10, 50.10, 59.…
## $ freedom            <dbl> 0.5201978, 0.9413823, 0.7715282, 0.6116642, 0…
## $ generosity         <dbl> -0.216674119, 0.300667107, -0.253262460, 0.07…
## $ perceptionCorrupt  <dbl> 0.8495131, 0.4901112, 0.7741172, 0.7859162, 0…
## $ positive_affect    <dbl> 0.4941210, 0.8231047, 0.5116876, 0.5999454, 0…
## $ negative_affect    <dbl> 0.4694188, 0.1738117, 0.2756951, 0.3207928, 0…
## $ trustInGov         <dbl> 0.3443375, 0.4970378, 0.7547066, 0.6137370, 0…
## $ demoQuality        <dbl> -0.50248164, 1.22430921, -1.18666148, -0.9783…
## $ ppp                <dbl> 1.600727e+02, 8.609180e-01, 2.211839e-01, 1.7…
## $ country_code.y     <chr> "ARM", "AUT", "AZE", "BGD", "BLR", "BEN", "BO…
## $ pppp               <dbl> 149.7049751, 0.8785690, 0.2253549, 18.3772035…
## $ region             <chr> "Europe & Central Asia", "Europe & Central As…
## $ income_group       <chr> "Upper middle income", "High income", "Upper …
## $ incomePperson      <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
## $ suicideR           <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
#glimpse(suicide_happiness2)

mean_y <- mean(suicide_happiness2$life_ladder)
#mean_y
# 5.406222

ggplot(suicide_happiness2) + 
  geom_point(aes(x = incomePperson, y = life_ladder), color = "Steelblue", alpha = 0.88) + 
  theme_half_open(font_size = 11) + 
  labs( title = "Happiness Score vs Income 2019", 
        x = "Income per Person", 
        y = "Happiness Score", 
        caption = "Source: https://www.gapminder.org/data/") +
  theme(plot.caption = element_text(face = 'bold.italic', hjust = 1))+
  scale_color_viridis(discrete = FALSE) + 
  geom_hline(yintercept =  5.406222, col = 'red') + 
  annotate('text', x = 30000, y = 5.25, hjust = 0, 
           label = 'Mean Happiness score for countries represented', 
           color = 'red', 
           size = 3) + 
  annotate('text', x = 90000, y = 8, hjust = 0, 
           label = 'R = 0.728', 
           color = 'red', 
           size = 3)

In this chart we compared income per person and the overall happiness score of the countries. The R value of the two factors was calculated and found to be 0.728, this is a high correlation coefficent and therefore the two factors might have an impact on one another.

When looking at the chart, the majority of the happiness points that fall below the mean happiness score also have an income equal to or less than 30000. However, there are no points with an income above 30000 that fall below the mean happiness score. Therefore it can be said that with a high income it is more likely that your happiness level will be higher and above the mean.

#FIND THE R VALUE 
###########################################################
# X VALUE, X = INCOME PER PERSON 
# MEAN 
mean_x <- mean(suicide_happiness2$income_per_person)
#mean_x
# 19974.12

r_value <- suicide_happiness2 %>% 
  mutate(income_minus_mean = (incomePperson - mean_x)*(incomePperson - mean_x)) %>% 
  mutate(happiness_minus_mean = (life_ladder - mean_y)*(life_ladder - mean_y))%>% 
  mutate(happiness_income = (incomePperson - mean_x)*(life_ladder - mean_y))


sum_x <- sum(r_value$income_minus_mean)
sum_y <- sum(r_value$happiness_minus_mean)
sum_x_y <- sum(r_value$happiness_income)
s_x <- sqrt(sum_x/(144-1))
s_y <- sqrt(sum_y/(144-1))
s_x_y <- sum_x_y/(144-1)

r_x_y <- (s_x_y)/(s_x*s_y)
#r_x_y
# .7277
#144
#########################################################################
# income per person bar chart
# join suicide_happiness2 and clean_country by country 
check1 <-final_data %>% 
  filter(year == 2018) 

#glimpse(check1)

bar_income <- final_data %>% 
  filter(!is.na(incomePperson))%>%
  filter(!is.na(life_ladder))%>%
  filter(!is.na(region)) %>%
  group_by(region) %>% 
  summarise(mean_income= mean(incomePperson), 
            mean_happy = mean(life_ladder)) %>% 
  mutate(
    region = fct_reorder(region, mean_income))
 
#glimpse(bar_income)  

ggplot(bar_income) + 
  geom_col(aes(x = region, y = mean_income, fill = mean_happy), 
           width = 0.7) +
  scale_y_continuous(
    expand = expand_scale(mult = c(0, 0.5))
  ) +
  coord_flip() +
  theme_minimal_vgrid(font_size = 11) + 
  labs( title = "Mean Income Per Region", 
        y = "Mean Income", 
        x = "Region", 
        fill = "Mean Happiness", 
        caption = "Source: https://www.gapminder.org/data/") +
  theme(plot.caption = element_text(face = 'bold.italic', hjust = 1))+
  scale_fill_viridis(
discrete = FALSE, option = 'plasma') + 
  theme(axis.title.y = element_blank())

This chart shows how each region compares to one another in terms of mean income and mean happiness level over the years 2008-2018. This chart further proves the idea that a higher income per person means a higher happiness score. The countries with the highest income per person are most happy, while the countries with the lowest income per person are the least happy.

After observing the impacts of the economy on happiness, we switch to the social impacts. Do social factors also play a role in a regions happiness? The variables we looked into range anywhere from social support to ones perception of corruption.

Social Factors

Region Mean Social Support Mean Healthy Life Expectancy Mean Freedom Score Mean Trust in Government Mean Perception of Corruption
East Asia & Pacific .87 67.81 .87 .65 .62
Europe & Central Asia .88 70.05 .79 .43 .67
Latin America & Caribbean .84 66.84 .83 .34 .79
Middle East & North Africa .85 69.87 .72 .43 .78
North America .91 70.95 .89 .46 .54
South Asia .71 62.52 .85 .68 .79
Sub - Sahran Africa .70 56.57 .74 .60 .77
  • Social support can also be looked at as having someone to count on in times of trouble, it is on a scale of 0 - 1.*
  • The corruption perception at the national level is just the average response of the overall perception at the individual level.*
#Correlation between happiness and social support 

EighteenData <- final_data%>%
  filter(year==2018)%>%
  filter(!is.na(region))%>%
  filter(!is.na(trustInGov))%>%
  filter(!is.na(perceptionCorrupt))

summaryEighteen <-EighteenData%>%
  group_by(region)%>%
  summarise(meanLife = mean(lifeExpAtBirth),
            meanSocial = mean(social_support),
            meanFreedom = mean(freedom),
            meanTrust = mean(trustInGov),
            meanGenerosity = mean(generosity),
            meanPerception = mean(perceptionCorrupt),
            meanHappiness = mean(life_ladder))

#EighteenData

summaryEighteen <- summaryEighteen%>%
  mutate(region1 = fct_reorder(region,meanSocial),
         region2 = fct_reorder(region,meanFreedom),
         region3 = fct_reorder(region,meanTrust),
         region4 = fct_reorder(region,meanGenerosity),
         region5 = fct_reorder(region,meanPerception),
         region6 = fct_reorder(region,meanLife)
         )

#summaryEighteen
# social support 
corSocial <- round(cor(
    EighteenData$social_support, EighteenData$life_ladder, 
    use = "complete.obs"), 2)

ggplot(summaryEighteen)+
  geom_col(aes(x = region1, y = meanSocial, fill = meanHappiness))+
  scale_fill_viridis(option= 'plasma')+
  coord_flip()+
  theme_minimal_vgrid()+
  scale_y_continuous(expand = expand_scale(mult = c(0,.25)))+
  labs(x = 'Mean Social Support', y = 'Region', title = 'Mean Social Support Score per Region', fill = 'Mean Happiness',
       caption = "Source: https://worldhappiness.report/ed/2019/#read")+
  theme(
    plot.title = element_text(hjust = .5),
  plot.caption = element_text(face = 'bold.italic', hjust = 1))

ggplot(EighteenData)+
  geom_point(aes(x = social_support, y = life_ladder))+
  annotate('text', x = .55, y = 7, label = str_c('r = ', corSocial), color = 'red')+
  theme_minimal()+
  labs(x = 'Social Support', y = 'Happiness Score', title = 'Correlation between Social Support and Happiness',
       caption = "Source: https://worldhappiness.report/ed/2019/#read") +
  theme(
    plot.title = element_text(hjust = .5),
    plot.caption = element_text(face = 'bold.italic', hjust = 1))

Social support is a strong indication of a regions happiness. We see that the happiest countries also have the most social support. This is highlighted in the correlation chart where we see that the R value of happiness and social support is very high.

#Correlation between happiness and life expectancy


ggplot(summaryEighteen)+
  geom_col(aes(x = region6, y = meanLife, fill = meanHappiness))+
  scale_fill_viridis(option= 'plasma')+
  coord_flip()+
  theme_minimal_vgrid()+
  scale_y_continuous(expand = expand_scale(mult = c(0,.25)))+
  labs(x = 'Mean Healthy Life Expectancy at Birth', y = 'Region', title = 'Mean of Healthy Life Expectancy at Birth per Region', fill = 'Mean Happiness',
       caption = "Source: https://worldhappiness.report/ed/2019/#read")+
  theme(
    plot.title = element_text(hjust = .5),
    plot.caption = element_text(face = 'bold.italic', hjust = 1))

corLifeExp <- round(cor(
    EighteenData$lifeExpAtBirth, EighteenData$life_ladder, 
    use = "complete.obs"), 2)

ggplot(EighteenData)+
  geom_point(aes(x = lifeExpAtBirth, y = life_ladder))+
  annotate('text', x = 55, y = 7, label = str_c('r = ', corLifeExp), color = 'red')+
  theme_minimal()+
  labs(x = 'Healthy Life Expectancy at Birth', y = 'Happiness Score', title = 'Correlation between Life Expectancy and Happiness',
       caption = "Source: https://worldhappiness.report/ed/2019/#read") +
  theme(
    plot.title = element_text(hjust = .5),
    plot.caption = element_text(face = 'bold.italic', hjust = 1))

Healthy Life expectancy is also strong indication of a regions happiness. We see that the happiest countries also have the ability to live a long and healthy life. This is highlighted in the correlation chart where we see that the R value of happiness and life expectancy is very high.

#Correlation between happiness and freedom
ggplot(summaryEighteen)+
  geom_col(aes(x = region2, y = meanFreedom, fill = meanHappiness))+
  scale_fill_viridis(option= 'plasma')+
  coord_flip()+
  theme_minimal_vgrid()+
  scale_y_continuous(expand = expand_scale(mult = c(0,.25)))+
  labs(x = 'Mean Level of Freedom', y = 'Region', title = 'Mean Level of Freedom per Region', fill = 'Mean Happiness',
       caption = "Source: https://worldhappiness.report/ed/2019/#read")+
  theme(
    plot.title = element_text(hjust = .5),
    plot.caption = element_text(face = 'bold.italic', hjust = 1))

corFreedom <- round(cor(
    EighteenData$freedom, EighteenData$life_ladder, 
    use = "complete.obs"), 2)

ggplot(EighteenData)+
  geom_point(aes(x = freedom, y = life_ladder))+
  annotate('text', x = .5, y = 7, label = str_c('r = ', corFreedom), color = 'red')+
  theme_minimal()+
  labs(x = 'Freedom', y = 'Happiness Score', title = 'Correlation between Feedom and Happiness',
       caption = "Source: https://worldhappiness.report/ed/2019/#read") +
  theme(
    plot.title = element_text(hjust = .5),
    plot.caption = element_text(face = 'bold.italic', hjust = 1))

Initally one would think there is a strong correlation between freedom and happiness on the assumption that a happy life consists of being free enough to make your own decisions. Although North America has a high level of both freedom and happiness, it is also shown that countries represented in South Asia such as India and Afghanistan, two countries with suppossedly high government restrictions express having a high level of freedom as well. Freedom is a hard variable to compare with happiness as the definition of freedom varies across regions.

#Correlation between happiness and trustInGov

ggplot(summaryEighteen)+
  geom_col(aes(x = region3, y = meanTrust, fill = meanHappiness))+
  scale_fill_viridis(option= 'plasma')+
  coord_flip()+
  theme_minimal_vgrid()+
  scale_y_continuous(expand = expand_scale(mult = c(0,.25)))+
  labs(x = 'Mean Trust in Government', y = 'Region', title = 'Mean Trust in Government per Region', fill = 'Mean Happiness',
       caption = "Source: https://worldhappiness.report/ed/2019/#read")+
  theme(
    plot.title = element_text(hjust = .5),
    plot.caption = element_text(face = 'bold.italic', hjust = 1))

corTrust <- round(cor(
    EighteenData$trustInGov, EighteenData$life_ladder, 
    use = "complete.obs"), 2)

ggplot(EighteenData)+
  geom_point(aes(x = trustInGov, y = life_ladder))+
  annotate('text', x = .2, y = 7.5, label = str_c('r = ', corTrust), color = 'red')+
  theme_minimal()+
  labs(x = 'Trust in the Government', y = 'Happiness Score', title = 'Correlation between Trust in the Government and Happiness',
       caption = "Source: https://worldhappiness.report/ed/2019/#read") +
  theme(
    plot.title = element_text(hjust = .5),
    plot.caption = element_text(face = 'bold.italic', hjust = 1))

We can see here that trust in government does not predict one’s level of happiness to any extent. This could relate to the results gathered from the freedom correlation charts. As citizens of a country usually believe their government has their best interest in mind.

#Correlation between happiness and perceptions of corrption

ggplot(summaryEighteen)+
  geom_col(aes(x = region5, y = meanPerception, fill = meanHappiness))+
  scale_fill_viridis(option= 'plasma')+
  coord_flip()+
  theme_minimal_vgrid()+
  scale_y_continuous(expand = expand_scale(mult = c(0,.25)))+
  labs(x = 'Mean Perception of Corruption', y = 'Region', title = 'Mean Perception of Corruption per Region', fill = 'Mean Happiness',
       caption = "Source: https://worldhappiness.report/ed/2019/#read")+
  theme(
    plot.title = element_text(hjust = .5),
    plot.caption = element_text(face = 'bold.italic', hjust = 1))

corPerception <- round(cor(
    EighteenData$perceptionCorrupt, EighteenData$life_ladder, 
    use = "complete.obs"), 2)

ggplot(EighteenData)+
  geom_point(aes(x = perceptionCorrupt, y = life_ladder))+
  annotate('text', x = .25, y = 6, label = str_c('r = ', corPerception), color = 'red')+
  theme_minimal()+
  labs(x = 'Perception of Corruption', y = 'Happiness Score', title = 'Correlation between Perception of Corrpution and Happiness',
       caption = "Source: https://worldhappiness.report/ed/2019/#read") +
  theme(
    plot.title = element_text(hjust = .5),
    plot.caption = element_text(face = 'bold.italic', hjust = 1))

Lastly, perception of corruption has moderate negative correlation with happiness. So, then regions with a high perception of corruption display low levels of happiness. While, this seems to make sense, it can be argued, as we argued for freedom, that there will be bias and influnce from the countries governement, and a level of fear.

Conclusion

################
# DATA
# TOP 10 COUNTRIES  
################ 
glimpse(final_data) 
## Observations: 2,421
## Variables: 21
## $ country            <chr> "Armenia", "Austria", "Azerbaijan", "Banglade…
## $ year               <dbl> 2006, 2006, 2006, 2006, 2006, 2006, 2006, 200…
## $ inequality         <dbl> 32.7, 29.8, 31.9, 33.0, 28.0, 40.2, 55.1, 62.…
## $ life_ladder        <dbl> 4.289311, 7.122211, 4.727871, 4.318909, 5.657…
## $ log_gdp_per_capita <dbl> 8.717719, 10.657212, 9.279043, 7.616417, 9.42…
## $ social_support     <dbl> 0.6818768, 0.9363504, 0.8544149, 0.6720022, 0…
## $ lifeExpAtBirth     <dbl> 64.80, 70.76, 61.88, 59.02, 61.10, 50.10, 59.…
## $ freedom            <dbl> 0.5201978, 0.9413823, 0.7715282, 0.6116642, 0…
## $ generosity         <dbl> -0.216674119, 0.300667107, -0.253262460, 0.07…
## $ perceptionCorrupt  <dbl> 0.8495131, 0.4901112, 0.7741172, 0.7859162, 0…
## $ positive_affect    <dbl> 0.4941210, 0.8231047, 0.5116876, 0.5999454, 0…
## $ negative_affect    <dbl> 0.4694188, 0.1738117, 0.2756951, 0.3207928, 0…
## $ trustInGov         <dbl> 0.3443375, 0.4970378, 0.7547066, 0.6137370, 0…
## $ demoQuality        <dbl> -0.50248164, 1.22430921, -1.18666148, -0.9783…
## $ ppp                <dbl> 1.600727e+02, 8.609180e-01, 2.211839e-01, 1.7…
## $ country_code.y     <chr> "ARM", "AUT", "AZE", "BGD", "BLR", "BEN", "BO…
## $ pppp               <dbl> 149.7049751, 0.8785690, 0.2253549, 18.3772035…
## $ region             <chr> "Europe & Central Asia", "Europe & Central As…
## $ income_group       <chr> "Upper middle income", "High income", "Upper …
## $ incomePperson      <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
## $ suicideR           <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
top_10 <- final_data %>% 
  filter(year == 2019) %>% 
  filter(!is.na(life_ladder))%>% 
  arrange(desc(life_ladder)) %>% 
  slice(1:10)
# THE TOP 10 COUNTRIES 
# finland, denmark, norway, iceland, netherlands, switzerland, sweden, new zealand, canada, austria 

glimpse(top_10)
## Observations: 10
## Variables: 21
## $ country            <chr> "Finland", "Denmark", "Norway", "Iceland", "N…
## $ year               <dbl> 2019, 2019, 2019, 2019, 2019, 2019, 2019, 201…
## $ inequality         <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA
## $ life_ladder        <dbl> 7.769, 7.600, 7.554, 7.494, 7.488, 7.480, 7.3…
## $ log_gdp_per_capita <dbl> 1.340, 1.383, 1.488, 1.380, 1.396, 1.452, 1.3…
## $ social_support     <dbl> 1.587, 1.573, 1.582, 1.624, 1.522, 1.526, 1.4…
## $ lifeExpAtBirth     <dbl> 0.986, 0.996, 1.028, 1.026, 0.999, 1.052, 1.0…
## $ freedom            <dbl> 0.596, 0.592, 0.603, 0.591, 0.557, 0.572, 0.5…
## $ generosity         <dbl> 0.153, 0.252, 0.271, 0.354, 0.322, 0.263, 0.2…
## $ perceptionCorrupt  <dbl> 0.393, 0.410, 0.341, 0.118, 0.298, 0.343, 0.3…
## $ positive_affect    <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA
## $ negative_affect    <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA
## $ trustInGov         <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA
## $ demoQuality        <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA
## $ ppp                <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA
## $ country_code.y     <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA
## $ pppp               <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA
## $ region             <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA
## $ income_group       <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA
## $ incomePperson      <dbl> 42400, 48300, 66300, 47900, 50500, 59000, 472…
## $ suicideR           <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA
top_10
## # A tibble: 10 x 21
##    country      year inequality life_ladder log_gdp_per_capita
##    <chr>       <dbl>      <dbl>       <dbl>              <dbl>
##  1 Finland      2019         NA        7.77               1.34
##  2 Denmark      2019         NA        7.6                1.38
##  3 Norway       2019         NA        7.55               1.49
##  4 Iceland      2019         NA        7.49               1.38
##  5 Netherlands  2019         NA        7.49               1.40
##  6 Switzerland  2019         NA        7.48               1.45
##  7 Sweden       2019         NA        7.34               1.39
##  8 New Zealand  2019         NA        7.31               1.30
##  9 Canada       2019         NA        7.28               1.36
## 10 Austria      2019         NA        7.25               1.38
##    social_support lifeExpAtBirth freedom generosity perceptionCorrupt
##             <dbl>          <dbl>   <dbl>      <dbl>             <dbl>
##  1           1.59          0.986   0.596      0.153             0.393
##  2           1.57          0.996   0.592      0.252             0.41 
##  3           1.58          1.03    0.603      0.271             0.341
##  4           1.62          1.03    0.591      0.354             0.118
##  5           1.52          0.999   0.557      0.322             0.298
##  6           1.53          1.05    0.572      0.263             0.343
##  7           1.49          1.01    0.574      0.267             0.373
##  8           1.56          1.03    0.585      0.33              0.38 
##  9           1.50          1.04    0.584      0.285             0.308
## 10           1.48          1.02    0.532      0.244             0.226
##    positive_affect negative_affect trustInGov demoQuality   ppp
##              <dbl>           <dbl>      <dbl>       <dbl> <dbl>
##  1              NA              NA         NA          NA    NA
##  2              NA              NA         NA          NA    NA
##  3              NA              NA         NA          NA    NA
##  4              NA              NA         NA          NA    NA
##  5              NA              NA         NA          NA    NA
##  6              NA              NA         NA          NA    NA
##  7              NA              NA         NA          NA    NA
##  8              NA              NA         NA          NA    NA
##  9              NA              NA         NA          NA    NA
## 10              NA              NA         NA          NA    NA
##    country_code.y  pppp region income_group incomePperson suicideR
##    <chr>          <dbl> <chr>  <chr>                <dbl>    <dbl>
##  1 <NA>              NA <NA>   <NA>                 42400       NA
##  2 <NA>              NA <NA>   <NA>                 48300       NA
##  3 <NA>              NA <NA>   <NA>                 66300       NA
##  4 <NA>              NA <NA>   <NA>                 47900       NA
##  5 <NA>              NA <NA>   <NA>                 50500       NA
##  6 <NA>              NA <NA>   <NA>                 59000       NA
##  7 <NA>              NA <NA>   <NA>                 47200       NA
##  8 <NA>              NA <NA>   <NA>                 36500       NA
##  9 <NA>              NA <NA>   <NA>                 44200       NA
## 10 <NA>              NA <NA>   <NA>                 46900       NA
################
# DATA 
# BOTTOM 10 COUNTRIES  
################ 
glimpse(final_data) 
## Observations: 2,421
## Variables: 21
## $ country            <chr> "Armenia", "Austria", "Azerbaijan", "Banglade…
## $ year               <dbl> 2006, 2006, 2006, 2006, 2006, 2006, 2006, 200…
## $ inequality         <dbl> 32.7, 29.8, 31.9, 33.0, 28.0, 40.2, 55.1, 62.…
## $ life_ladder        <dbl> 4.289311, 7.122211, 4.727871, 4.318909, 5.657…
## $ log_gdp_per_capita <dbl> 8.717719, 10.657212, 9.279043, 7.616417, 9.42…
## $ social_support     <dbl> 0.6818768, 0.9363504, 0.8544149, 0.6720022, 0…
## $ lifeExpAtBirth     <dbl> 64.80, 70.76, 61.88, 59.02, 61.10, 50.10, 59.…
## $ freedom            <dbl> 0.5201978, 0.9413823, 0.7715282, 0.6116642, 0…
## $ generosity         <dbl> -0.216674119, 0.300667107, -0.253262460, 0.07…
## $ perceptionCorrupt  <dbl> 0.8495131, 0.4901112, 0.7741172, 0.7859162, 0…
## $ positive_affect    <dbl> 0.4941210, 0.8231047, 0.5116876, 0.5999454, 0…
## $ negative_affect    <dbl> 0.4694188, 0.1738117, 0.2756951, 0.3207928, 0…
## $ trustInGov         <dbl> 0.3443375, 0.4970378, 0.7547066, 0.6137370, 0…
## $ demoQuality        <dbl> -0.50248164, 1.22430921, -1.18666148, -0.9783…
## $ ppp                <dbl> 1.600727e+02, 8.609180e-01, 2.211839e-01, 1.7…
## $ country_code.y     <chr> "ARM", "AUT", "AZE", "BGD", "BLR", "BEN", "BO…
## $ pppp               <dbl> 149.7049751, 0.8785690, 0.2253549, 18.3772035…
## $ region             <chr> "Europe & Central Asia", "Europe & Central As…
## $ income_group       <chr> "Upper middle income", "High income", "Upper …
## $ incomePperson      <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
## $ suicideR           <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
bottom_10 <- final_data %>% 
  filter(year == 2019) %>% 
  filter(!is.na(life_ladder))%>% 
  arrange(life_ladder) %>% 
  slice(1:10)

glimpse(bottom_10)
## Observations: 10
## Variables: 21
## $ country            <chr> "South Sudan", "Central African Republic", "A…
## $ year               <dbl> 2019, 2019, 2019, 2019, 2019, 2019, 2019, 201…
## $ inequality         <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA
## $ life_ladder        <dbl> 2.853, 3.083, 3.203, 3.231, 3.334, 3.380, 3.4…
## $ log_gdp_per_capita <dbl> 0.306, 0.026, 0.350, 0.476, 0.359, 0.287, 0.1…
## $ social_support     <dbl> 0.575, 0.000, 0.517, 0.885, 0.711, 1.163, 0.5…
## $ lifeExpAtBirth     <dbl> 0.295, 0.105, 0.361, 0.499, 0.614, 0.463, 0.4…
## $ freedom            <dbl> 0.010, 0.225, 0.000, 0.417, 0.555, 0.143, 0.4…
## $ generosity         <dbl> 0.202, 0.235, 0.158, 0.276, 0.217, 0.108, 0.2…
## $ perceptionCorrupt  <dbl> 0.091, 0.035, 0.025, 0.147, 0.411, 0.077, 0.0…
## $ positive_affect    <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA
## $ negative_affect    <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA
## $ trustInGov         <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA
## $ demoQuality        <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA
## $ ppp                <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA
## $ country_code.y     <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA
## $ pppp               <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA
## $ region             <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA
## $ income_group       <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA
## $ incomePperson      <dbl> 1860, 794, 1760, 2980, 2110, 2340, 1180, 2900…
## $ suicideR           <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA
bottom_10
## # A tibble: 10 x 21
##    country                   year inequality life_ladder log_gdp_per_capita
##    <chr>                    <dbl>      <dbl>       <dbl>              <dbl>
##  1 South Sudan               2019         NA        2.85              0.306
##  2 Central African Republic  2019         NA        3.08              0.026
##  3 Afghanistan               2019         NA        3.20              0.35 
##  4 Tanzania                  2019         NA        3.23              0.476
##  5 Rwanda                    2019         NA        3.33              0.359
##  6 Yemen                     2019         NA        3.38              0.287
##  7 Malawi                    2019         NA        3.41              0.191
##  8 Syria                     2019         NA        3.46              0.619
##  9 Botswana                  2019         NA        3.49              1.04 
## 10 Haiti                     2019         NA        3.60              0.323
##    social_support lifeExpAtBirth freedom generosity perceptionCorrupt
##             <dbl>          <dbl>   <dbl>      <dbl>             <dbl>
##  1          0.575          0.295   0.01       0.202             0.091
##  2          0              0.105   0.225      0.235             0.035
##  3          0.517          0.361   0          0.158             0.025
##  4          0.885          0.499   0.417      0.276             0.147
##  5          0.711          0.614   0.555      0.217             0.411
##  6          1.16           0.463   0.143      0.108             0.077
##  7          0.56           0.495   0.443      0.218             0.089
##  8          0.378          0.44    0.013      0.331             0.141
##  9          1.14           0.538   0.455      0.025             0.1  
## 10          0.688          0.449   0.026      0.419             0.11 
##    positive_affect negative_affect trustInGov demoQuality   ppp
##              <dbl>           <dbl>      <dbl>       <dbl> <dbl>
##  1              NA              NA         NA          NA    NA
##  2              NA              NA         NA          NA    NA
##  3              NA              NA         NA          NA    NA
##  4              NA              NA         NA          NA    NA
##  5              NA              NA         NA          NA    NA
##  6              NA              NA         NA          NA    NA
##  7              NA              NA         NA          NA    NA
##  8              NA              NA         NA          NA    NA
##  9              NA              NA         NA          NA    NA
## 10              NA              NA         NA          NA    NA
##    country_code.y  pppp region income_group incomePperson suicideR
##    <chr>          <dbl> <chr>  <chr>                <dbl>    <dbl>
##  1 <NA>              NA <NA>   <NA>                  1860       NA
##  2 <NA>              NA <NA>   <NA>                   794       NA
##  3 <NA>              NA <NA>   <NA>                  1760       NA
##  4 <NA>              NA <NA>   <NA>                  2980       NA
##  5 <NA>              NA <NA>   <NA>                  2110       NA
##  6 <NA>              NA <NA>   <NA>                  2340       NA
##  7 <NA>              NA <NA>   <NA>                  1180       NA
##  8 <NA>              NA <NA>   <NA>                  2900       NA
##  9 <NA>              NA <NA>   <NA>                 16800       NA
## 10 <NA>              NA <NA>   <NA>                  1640       NA
# THE BOTTOME 10 COUNTRIES 
# South Sudan, Central African Republic, Afghanistan, Tanzania, Rwanda, Yemen, Malawi, Syria, Botswana, Haiti
################
# GRAPH 3
# TOP / BOTTOM 10 COUNTRIES 
################ 
# combine the data 

ten <- bind_rows(top_10, bottom_10) %>% 
  mutate(country = fct_reorder(country, life_ladder)) %>% 
  mutate(color = ifelse(life_ladder > 5, "bottom", "top"))
#glimpse(ten)

ggplot(ten) +
  geom_col(aes(x = country, y = life_ladder, fill = color), 
           width = 0.7, alpha = 0.8) + 
  scale_y_continuous(
    expand = expansion(mult = c(0, 0.05))
  ) + 
  coord_flip() + 
  theme_minimal_vgrid(font_size = 11) + 
  scale_fill_manual( values = c( 'steelblue', 'lightblue')) + 
  theme(legend.position = 'none', 
        axis.title.y = element_blank(),
        plot.caption = element_text(face = 'bold.italic', hjust = 1)) + 
  labs(
    y = "Overall Happiness Score", 
    title = "Top 10 and Bottom 10 Overall Happiness Score", 
    subtitle = "2019", 
    caption = "Source: https://worldhappiness.report/ed/2019/#read"
  )

The top ten countries with the highest happiness score, and bottom ten countries with the lowest happiness score were found and compared. The graph displays that the top ten countries’ overall happiness scores are about double of the bottom ten countries. This chart also shows that almost all of the top ten countries are found in Europe besides Canada and New Zealand, as well as a majority of the bottom ten countries are located in Africa and Asia.

################
# GRAPH 4
# TOP / BOTTOM 10 COUNTRIES INCOME COMPARISON
################ 
# combine the data 

ggplot(ten) +
  geom_col(aes(x = country, y = incomePperson, fill = color), 
           width = 0.7, alpha = 0.8) + 
  scale_y_continuous(
    expand = expansion(mult = c(0, 0.05))
  ) + 
  coord_flip() + 
  theme_minimal_vgrid(font_size = 11) + 
  scale_fill_manual( values = c( 'steelblue', 'lightblue')) + 
  theme(legend.position = 'none', 
        axis.title.y = element_blank(),
        plot.caption = element_text(face = 'bold.italic', hjust = 1))+
  labs(
    y = "Income Per Person", 
    title = "Top 10 and Bottom 10 Overall Happiness Score", 
    subtitle = "Income Per Person Comparison, 2019", 
    caption = "Source: https://worldhappiness.report/ed/2019/#read"
  )

As displayed above the difference in income between the top ten countries and bottom ten are drastically different. The average income for the bottom ten countries was found to be around 3,400.00 dollars and the average income for the top ten countries is found to be around 50,000 dollars. The top countries have an income around fourteen times greater than the bottom ten.

This proves to show that a high income, in developed countries, will most likely result in a higher happiness score than low income, under-developed countries.

tenGDP <- bind_rows(top_10, bottom_10) %>% 
  mutate(country = fct_reorder(country, log_gdp_per_capita)) %>% 
  mutate(color = ifelse(life_ladder > 5, "bottom", "top"))


ggplot(tenGDP) +
  geom_col(aes(x = country, y = log_gdp_per_capita, fill = color), 
           width = 0.7, alpha = 0.8) + 
  scale_y_continuous(
    expand = expansion(mult = c(0, 0.05))
  ) + 
  coord_flip() + 
  theme_minimal_vgrid(font_size = 11) + 
  scale_fill_manual( values = c( 'steelblue', 'lightblue')) + 
  theme(legend.position = 'none', 
        axis.title.y = element_blank(),
        plot.caption = element_text(face = 'bold.italic', hjust = 1))+
  labs(
    y = "Income Per Person", 
    title = "Top 10 and Bottom 10 Overall Happiness Score", 
    subtitle = "log(gdp) Comparison, 2019", 
    caption = "Source: https://worldhappiness.report/ed/2019/#read"
  )

As displayed above the difference in economic health between the top ten countries and bottom ten illustrates a major difference. This proves to show that just a healthy and smooth running economy has a strong likelihood increase the happiness of a countries population.

ggplot(ten_social_health) +
  geom_col(aes(x = country, y = social_support, fill = color),width = 0.7, alpha = 0.8) +
  scale_fill_manual( values = c( 'steelblue', 'lightblue')) + 
  scale_y_continuous(
    expand = expansion(mult = c(0, 0.05))
  ) + 
  coord_flip() + 
  theme_minimal_vgrid(font_size = 11) + 
  
  theme(legend.position = 'none', 
        axis.title.y = element_blank(),
        plot.caption = element_text(face = 'bold.italic', hjust = 1)) + 
  labs(
    y = "Social Support", 
    title = "Top 10 and Bottom 10 Overall Happiness Score", 
    subtitle = "Social Support, 2019", 
    caption = "Source: https://worldhappiness.report/ed/2019/#read"
  )

This chart compares the social support scores of each country. Again, the level of social support represents how strongly the country provides support of family and friends.

This graph portrays that all of the top ten countries have a high sense of social support. There is more variety amoungst the bottom ten countries, but the majority of them have less than half the social support then the top ten have. We conclude that social support heavily influences a country’s overall happiness score.

#HEALTHY LIFE EXPECTANCY 

ggplot(ten_social_health) +
  geom_col(aes(x = country, y = lifeExpAtBirth, fill = color),width = 0.7, alpha = 0.8) +
  scale_fill_manual( values = c( 'steelblue', 'lightblue')) + 
  scale_y_continuous(
    expand = expansion(mult = c(0, 0.05))
  ) + 
  coord_flip() + 
  theme_minimal_vgrid(font_size = 11) + 
  theme(legend.position = 'none', 
        axis.title.y = element_blank(),
        plot.caption = element_text(face = 'bold.italic', hjust = 1)) + 
  labs(
    y = "Healthy Life Expectancy", 
    title = "Top 10 and Bottom 10 Overall Happiness Score", 
    subtitle = "Healthy Life Expextancy, 2019", 
    caption = "Source: https://worldhappiness.report/ed/2019/#read")

This chart displays the difference in life expectancy between the top ten countries and the bottom ten. The top ten countries all have a high healthy life expectancy, and the bottom ten countries live a much shorter life. Countries where one is to live a long and prosperous life is more likely to live a happier life.

Final Conclusion and the Future

In conclusion, factors that have a high influence, or correlation, to a region or countries happiness are concentrated in economic and social factors. Namely, GDP per capita, income per person, social support and a healthy life expectantcy. The visualizations reflecting freedom, trust in the government, and perception of corruption disproved original claims that factors directly involving a countries governance would highly impact ones level of happiness, as the correlation coefficents were minimal to moderate.

We can agree that through this analyzation proccess happiness most likely comes from a level of satisfaction and safety in ones life.

As countries evolve, we ask the question if these four factors are continuously correlated to a country’s increase or decrease in happiness. For example, as ones happiness decreases is that directly related to a decrease in their income per person or life expectancy?

This report leads to more questions about the pursuit of happiness, and what happiness truly looks like across the globe.

Appendix

World Happiness Report:

Variable Data type Description
Country character Country name
Year numerical Year data was collected
Life_ladder numerical Average score of people when asked to score their life on a scale of 0 to 10, 10 being the best and 0 being the worst possible life. People rated based on where they felt they were at the time
Log_gdp_per_capita numberical Log of the gross domestic product per country
Healthy_life_expectancy numerical Life expectancy based on data from the World Health Organization
Social support numerical Reflects “having someone to count on in times of need” on a 0-1 scale in the Gallup world report
Freedom numerical Average response when asked a person’s freedom to choose what to do with their life according to the gallup world report
Generosity numerical Data from the gallup world report based on number of people who gave to charity in the past month

W.H.O Suicide Statsitics(kaggle)

Variable Data type Description
Country character Country victim lost their life in
Sex character Gender of victim (Male/female/Both)
Year double Year incident happened
Age character Age category of victim
Suicide_no double Number of deaths by intentional harm
Population double Number of all people living in the country.

G.H.O W.H.O Suicide Statsitics

Variable Data type Description
Country character Country victim lost their life in
Sex character Gender of victim (Male/female/Both)
Year double Year incident happened
suicide_rate double rate of crude suicide deaths per 100,000

Purchasing Power Parities conservation rates

Variable Data type Description
country character Country name
country_code character 3 letter country abbreviation
region character Country region
year numerical Year data was collected
Ppp (purchasing power parity gdp) numerical the number of units of a country’s currency required to buy the same amounts of goods and services in the domestic market as U.S. dollar would buy in the United States. This conversion factor is for GDP.
Pppp (purchasing power parity private) numerical the number of units of a country’s currency required to buy the same amounts of goods and services in the domestic market as U.S. dollar would buy in the United States. This conversion factor is for private consumption (i.e., household final consumption expenditure).

Gapminder Data

Variable Data type Description
Country character Country name
Year double year
Income_per_person double gross domestic product per person adjusted for differences in purchasing power to show the overall income per person for each country
Income_inequality double How much the distribution of income deviates from perfectly equal distribution