01-cleaning

library(tidyverse)
library(janitor)
library(readxl)

Goals:

ay_names <- c(
  "country",
   "AY99_00",
   "AY00_01",
   "AY01_02",
   "AY02_03",
   "AY03_04",
   "AY04_05",
   "AY05_06",
   "AY06_07",
   "AY07_08",
   "AY08_09",
   "AY09_10",
   "AY10_11",
   "AY11_12",
   "AY12_13",
   "AY13_14",
   "AY14_15",
   "AY15_16",
   "AY16_17",
   "AY17_18",
   "AY18_19",
   "AY19_20",
   "AY20_21",
   "AY21_22")

East Africa

east_africa <- read_excel("data-raw/study_abroad.xlsx",
           col_names = ay_names, 
          range = "B9:Y21", 
          col_types = "text") |>
  add_column(region = "East Africa") |>
  relocate(region)

east_africa

Central Africa

central_africa <- read_excel("data-raw/study_abroad.xlsx",
           col_names = ay_names, 
          range = "B24:Y31",
          col_types = "text") |>
  add_column(region = "Central Africa") |>
  relocate(region)

central_africa

Southern Africa

southern_africa <- read_excel("data-raw/study_abroad.xlsx",
           col_names = ay_names, 
          range = "B34:Y48",
          col_types = "text") |>
  add_column(region = "Southern Africa") |>
  relocate(region)

southern_africa

West Africa

west_africa <- read_excel("data-raw/study_abroad.xlsx",
           col_names = ay_names, 
          range = "B51:Y68",
          col_types = "text") |>
  add_column(region = "West Africa") |>
  relocate(region)

west_africa

East Asia

east_asia <- read_excel("data-raw/study_abroad.xlsx",
           col_names = ay_names, 
          range = "B75:Y83",
          col_types = "text") |>
  add_column(region = "East Asia") |>
  relocate(region)

east_asia

South and Central Asia

south_and_central_asia <- read_excel("data-raw/study_abroad.xlsx",
           col_names = ay_names, 
          range = "B86:Y99",
          col_types = "text") |>
  add_column(region = "South and Central Asia") |>
  relocate(region)

south_and_central_asia

Southeast Asia

southeast_asia <- read_excel("data-raw/study_abroad.xlsx",
           col_names = ay_names, 
          range = "B102:Y112",
          col_types = "text") |>
  add_column(region = "Southeast Asia") |>
  relocate(region)

southeast_asia

Europe

europe <- read_excel("data-raw/study_abroad.xlsx",
           col_names = ay_names, 
          range = "B116:Y170",
          col_types = "text") |>
  add_column(region = "Europe") |>
  relocate(region)

europe

Caribbean

caribbean <- read_excel("data-raw/study_abroad.xlsx",
           col_names = ay_names, 
          range = "B177:Y202",
          col_types = "text") |>
  add_column(region = "Caribbean") |>
  relocate(region)

caribbean

Mexico and Central America

mexico_and_central_america <- read_excel("data-raw/study_abroad.xlsx",
           col_names = ay_names, 
          range = "B205:Y213",
          col_types = "text") |>
  add_column(region = "Mexico and Central America") |>
  relocate(region)

mexico_and_central_america

South America

south_america <- read_excel("data-raw/study_abroad.xlsx",
           col_names = ay_names, 
          range = "B216:Y230",
          col_types = "text") |>
  add_column(region = "Southern Africa") |>
  relocate(region)

south_america

Middle East

middle_east <- read_excel("data-raw/study_abroad.xlsx",
           col_names = ay_names, 
          range = "B236:Y250",
          col_types = "text") |>
  add_column(region = "Middle East") |>
  relocate(region)

middle_east

North Africa

north_africa <- read_excel("data-raw/study_abroad.xlsx",
           col_names = ay_names, 
          range = "B253:Y258",
          col_types = "text") |>
  add_column(region = "North Africa") |>
  relocate(region)

north_africa

North America (Canada)

north_america <- read_excel("data-raw/study_abroad.xlsx",
           col_names = ay_names, 
          range = "B262:Y262",
          col_types = "text") |>
  add_column(region = "North America") |>
  relocate(region)

north_america

Oceania

oceania <- read_excel("data-raw/study_abroad.xlsx",
           col_names = ay_names, 
          range = "B266:Y285",
          col_types = "text") |>
  add_column(region = "Oceania") |>
  relocate(region)

oceania

Antarctica

antarctica <- read_excel("data-raw/study_abroad.xlsx",
           col_names = ay_names, 
          range = "B287:Y287",
          col_types = "text") |>
  add_column(region = "Antarctica") |>
  relocate(region)

antarctica

Multi-destination

multi_destination <- read_excel("data-raw/study_abroad.xlsx",
           col_names = ay_names, 
          range = "B289:Y289",
          col_types = "text") |>
  add_column(region = "Multi-destination") |>
  relocate(region)

multi_destination

Combine Study Abroad Destinations Together

study_abroad_combined <- bind_rows(east_africa, central_africa, southern_africa, west_africa, east_asia, south_and_central_asia, southeast_asia, europe, caribbean, mexico_and_central_america, south_america, middle_east, north_africa, north_america, oceania, antarctica, multi_destination) |>
 mutate(across(AY99_00:AY21_22, parse_number))
Warning: There were 23 warnings in `mutate()`.
The first warning was:
ℹ In argument: `across(AY99_00:AY21_22, parse_number)`.
Caused by warning:
! 4 parsing failures.
row col expected actual
  9  -- a number      -
153  -- a number      -
166  -- a number      -
235  -- a number      -
ℹ Run `dplyr::last_dplyr_warnings()` to see the 22 remaining warnings.
glimpse(study_abroad_combined)
Rows: 237
Columns: 25
$ region  <chr> "East Africa", "East Africa", "East Africa", "East Africa", "E…
$ country <chr> "Burundi", "Djibouti", "Eritrea", "Ethiopia", "Kenya", "Rwanda…
$ AY99_00 <dbl> 0, 0, 0, 0, 695, 1, 0, 0, NA, 0, 253, 44, 0, 53, 0, 1, 1, 0, 0…
$ AY00_01 <dbl> 0, 0, 18, 12, 846, 0, 0, 0, NA, 0, 295, 28, 1, 95, 13, 0, 0, 0…
$ AY01_02 <dbl> 0, 0, 0, 30, 720, 1, 0, 0, NA, 0, 293, 76, 0, 51, 0, 0, 2, 0, …
$ AY02_03 <dbl> 0, 0, 12, 26, 625, 6, 0, 0, NA, 0, 347, 85, 1, 66, 0, 0, 1, 0,…
$ AY03_04 <dbl> 0, 0, 1, 26, 387, 10, 0, 1, NA, 0, 373, 141, 0, 74, 0, 0, 0, 1…
$ AY04_05 <dbl> 1, 0, 14, 33, 661, 14, 0, 1, NA, 1, 467, 159, 0, 75, 7, 0, 0, …
$ AY05_06 <dbl> 0, 0, 8, 56, 694, 44, 0, 1, NA, 43, 557, 327, 0, 66, 0, 0, 0, …
$ AY06_07 <dbl> 1, 0, 1, 84, 686, 59, 0, 1, NA, 4, 630, 357, 0, 142, 2, 4, 1, …
$ AY07_08 <dbl> 0, 0, 0, 115, 657, 61, 0, 0, NA, 2, 783, 457, 0, 108, 9, 0, 3,…
$ AY08_09 <dbl> 0, 0, 1, 133, 881, 203, 0, 1, NA, 1, 863, 611, 0, 134, 0, 0, 1…
$ AY09_10 <dbl> 0, 23, 3, 191, 1198, 126, 0, 1, NA, 1, 962, 571, 0, 70, 2, 1, …
$ AY10_11 <dbl> 2, 1, 0, 147, 1291, 196, 0, 2, NA, 5, 1126, 673, 0, 106, 8, 1,…
$ AY11_12 <dbl> 9, 4, 1, 267, 1231, 308, 0, 1, NA, 6, 1115, 716, 0, 135, 2, 0,…
$ AY12_13 <dbl> 0, 1, 0, 231, 1238, 366, 1, 0, 2, 3, 1238, 692, 0, 103, 0, 5, …
$ AY13_14 <dbl> 1, 1, 1, 235, 1022, 332, 1, 1, 7, 0, 1294, 685, 0, 175, 0, 0, …
$ AY14_15 <dbl> 2, 0, 0, 261, 634, 275, 0, 0, 3, 1, 1216, 527, 0, 92, 0, 0, 3,…
$ AY15_16 <dbl> 0, 0, 1, 302, 681, 411, 0, 0, 12, 1, 1254, 722, 0, 53, 0, 1, 2…
$ AY16_17 <dbl> 11, 0, 1, 73, 778, 380, 0, 0, 23, 0, 1364, 845, 0, 99, 0, 0, 3…
$ AY17_18 <dbl> 0, 0, 0, 207, 927, 377, 0, 10, 0, 16, 1556, 837, 0, 57, 0, 2, …
$ AY18_19 <dbl> 0, 0, 5, 249, 926, 460, 3, 0, 1, 0, 1334, 797, 0, 34, 1, 0, 13…
$ AY19_20 <dbl> 0, 8, 0, 62, 494, 139, 0, 1, 0, 0, 602, 318, 0, 20, 0, 5, 2, 0…
$ AY20_21 <dbl> 1, 0, 0, 4, 36, 36, 0, 1, 0, 0, 29, 21, 0, 2, 0, 1, 0, 1, 0, 0…
$ AY21_22 <dbl> 2, 2, 0, 9, 737, 235, 1, 0, 1, 5, 359, 227, 0, 10, 0, 1, 3, 5,…
study_abroad_combined
study_abroad_combined |> write_rds("data-processed/01-study_abroad_combined.rds")