library(tidyverse)
library(janitor)
library(readxl)
01-cleaning
Goals:
<- c(
ay_names "country",
"AY99_00",
"AY00_01",
"AY01_02",
"AY02_03",
"AY03_04",
"AY04_05",
"AY05_06",
"AY06_07",
"AY07_08",
"AY08_09",
"AY09_10",
"AY10_11",
"AY11_12",
"AY12_13",
"AY13_14",
"AY14_15",
"AY15_16",
"AY16_17",
"AY17_18",
"AY18_19",
"AY19_20",
"AY20_21",
"AY21_22")
East Africa
<- read_excel("data-raw/study_abroad.xlsx",
east_africa col_names = ay_names,
range = "B9:Y21",
col_types = "text") |>
add_column(region = "East Africa") |>
relocate(region)
east_africa
Central Africa
<- read_excel("data-raw/study_abroad.xlsx",
central_africa col_names = ay_names,
range = "B24:Y31",
col_types = "text") |>
add_column(region = "Central Africa") |>
relocate(region)
central_africa
Southern Africa
<- read_excel("data-raw/study_abroad.xlsx",
southern_africa col_names = ay_names,
range = "B34:Y48",
col_types = "text") |>
add_column(region = "Southern Africa") |>
relocate(region)
southern_africa
West Africa
<- read_excel("data-raw/study_abroad.xlsx",
west_africa col_names = ay_names,
range = "B51:Y68",
col_types = "text") |>
add_column(region = "West Africa") |>
relocate(region)
west_africa
East Asia
<- read_excel("data-raw/study_abroad.xlsx",
east_asia col_names = ay_names,
range = "B75:Y83",
col_types = "text") |>
add_column(region = "East Asia") |>
relocate(region)
east_asia
South and Central Asia
<- read_excel("data-raw/study_abroad.xlsx",
south_and_central_asia col_names = ay_names,
range = "B86:Y99",
col_types = "text") |>
add_column(region = "South and Central Asia") |>
relocate(region)
south_and_central_asia
Southeast Asia
<- read_excel("data-raw/study_abroad.xlsx",
southeast_asia col_names = ay_names,
range = "B102:Y112",
col_types = "text") |>
add_column(region = "Southeast Asia") |>
relocate(region)
southeast_asia
Europe
<- read_excel("data-raw/study_abroad.xlsx",
europe col_names = ay_names,
range = "B116:Y170",
col_types = "text") |>
add_column(region = "Europe") |>
relocate(region)
europe
Caribbean
<- read_excel("data-raw/study_abroad.xlsx",
caribbean col_names = ay_names,
range = "B177:Y202",
col_types = "text") |>
add_column(region = "Caribbean") |>
relocate(region)
caribbean
Mexico and Central America
<- read_excel("data-raw/study_abroad.xlsx",
mexico_and_central_america col_names = ay_names,
range = "B205:Y213",
col_types = "text") |>
add_column(region = "Mexico and Central America") |>
relocate(region)
mexico_and_central_america
South America
<- read_excel("data-raw/study_abroad.xlsx",
south_america col_names = ay_names,
range = "B216:Y230",
col_types = "text") |>
add_column(region = "Southern Africa") |>
relocate(region)
south_america
Middle East
<- read_excel("data-raw/study_abroad.xlsx",
middle_east col_names = ay_names,
range = "B236:Y250",
col_types = "text") |>
add_column(region = "Middle East") |>
relocate(region)
middle_east
North Africa
<- read_excel("data-raw/study_abroad.xlsx",
north_africa col_names = ay_names,
range = "B253:Y258",
col_types = "text") |>
add_column(region = "North Africa") |>
relocate(region)
north_africa
North America (Canada)
<- read_excel("data-raw/study_abroad.xlsx",
north_america col_names = ay_names,
range = "B262:Y262",
col_types = "text") |>
add_column(region = "North America") |>
relocate(region)
north_america
Oceania
<- read_excel("data-raw/study_abroad.xlsx",
oceania col_names = ay_names,
range = "B266:Y285",
col_types = "text") |>
add_column(region = "Oceania") |>
relocate(region)
oceania
Antarctica
<- read_excel("data-raw/study_abroad.xlsx",
antarctica col_names = ay_names,
range = "B287:Y287",
col_types = "text") |>
add_column(region = "Antarctica") |>
relocate(region)
antarctica
Multi-destination
<- read_excel("data-raw/study_abroad.xlsx",
multi_destination col_names = ay_names,
range = "B289:Y289",
col_types = "text") |>
add_column(region = "Multi-destination") |>
relocate(region)
multi_destination
Combine Study Abroad Destinations Together
<- bind_rows(east_africa, central_africa, southern_africa, west_africa, east_asia, south_and_central_asia, southeast_asia, europe, caribbean, mexico_and_central_america, south_america, middle_east, north_africa, north_america, oceania, antarctica, multi_destination) |>
study_abroad_combined mutate(across(AY99_00:AY21_22, parse_number))
Warning: There were 23 warnings in `mutate()`.
The first warning was:
ℹ In argument: `across(AY99_00:AY21_22, parse_number)`.
Caused by warning:
! 4 parsing failures.
row col expected actual
9 -- a number -
153 -- a number -
166 -- a number -
235 -- a number -
ℹ Run `dplyr::last_dplyr_warnings()` to see the 22 remaining warnings.
glimpse(study_abroad_combined)
Rows: 237
Columns: 25
$ region <chr> "East Africa", "East Africa", "East Africa", "East Africa", "E…
$ country <chr> "Burundi", "Djibouti", "Eritrea", "Ethiopia", "Kenya", "Rwanda…
$ AY99_00 <dbl> 0, 0, 0, 0, 695, 1, 0, 0, NA, 0, 253, 44, 0, 53, 0, 1, 1, 0, 0…
$ AY00_01 <dbl> 0, 0, 18, 12, 846, 0, 0, 0, NA, 0, 295, 28, 1, 95, 13, 0, 0, 0…
$ AY01_02 <dbl> 0, 0, 0, 30, 720, 1, 0, 0, NA, 0, 293, 76, 0, 51, 0, 0, 2, 0, …
$ AY02_03 <dbl> 0, 0, 12, 26, 625, 6, 0, 0, NA, 0, 347, 85, 1, 66, 0, 0, 1, 0,…
$ AY03_04 <dbl> 0, 0, 1, 26, 387, 10, 0, 1, NA, 0, 373, 141, 0, 74, 0, 0, 0, 1…
$ AY04_05 <dbl> 1, 0, 14, 33, 661, 14, 0, 1, NA, 1, 467, 159, 0, 75, 7, 0, 0, …
$ AY05_06 <dbl> 0, 0, 8, 56, 694, 44, 0, 1, NA, 43, 557, 327, 0, 66, 0, 0, 0, …
$ AY06_07 <dbl> 1, 0, 1, 84, 686, 59, 0, 1, NA, 4, 630, 357, 0, 142, 2, 4, 1, …
$ AY07_08 <dbl> 0, 0, 0, 115, 657, 61, 0, 0, NA, 2, 783, 457, 0, 108, 9, 0, 3,…
$ AY08_09 <dbl> 0, 0, 1, 133, 881, 203, 0, 1, NA, 1, 863, 611, 0, 134, 0, 0, 1…
$ AY09_10 <dbl> 0, 23, 3, 191, 1198, 126, 0, 1, NA, 1, 962, 571, 0, 70, 2, 1, …
$ AY10_11 <dbl> 2, 1, 0, 147, 1291, 196, 0, 2, NA, 5, 1126, 673, 0, 106, 8, 1,…
$ AY11_12 <dbl> 9, 4, 1, 267, 1231, 308, 0, 1, NA, 6, 1115, 716, 0, 135, 2, 0,…
$ AY12_13 <dbl> 0, 1, 0, 231, 1238, 366, 1, 0, 2, 3, 1238, 692, 0, 103, 0, 5, …
$ AY13_14 <dbl> 1, 1, 1, 235, 1022, 332, 1, 1, 7, 0, 1294, 685, 0, 175, 0, 0, …
$ AY14_15 <dbl> 2, 0, 0, 261, 634, 275, 0, 0, 3, 1, 1216, 527, 0, 92, 0, 0, 3,…
$ AY15_16 <dbl> 0, 0, 1, 302, 681, 411, 0, 0, 12, 1, 1254, 722, 0, 53, 0, 1, 2…
$ AY16_17 <dbl> 11, 0, 1, 73, 778, 380, 0, 0, 23, 0, 1364, 845, 0, 99, 0, 0, 3…
$ AY17_18 <dbl> 0, 0, 0, 207, 927, 377, 0, 10, 0, 16, 1556, 837, 0, 57, 0, 2, …
$ AY18_19 <dbl> 0, 0, 5, 249, 926, 460, 3, 0, 1, 0, 1334, 797, 0, 34, 1, 0, 13…
$ AY19_20 <dbl> 0, 8, 0, 62, 494, 139, 0, 1, 0, 0, 602, 318, 0, 20, 0, 5, 2, 0…
$ AY20_21 <dbl> 1, 0, 0, 4, 36, 36, 0, 1, 0, 0, 29, 21, 0, 2, 0, 1, 0, 1, 0, 0…
$ AY21_22 <dbl> 2, 2, 0, 9, 737, 235, 1, 0, 1, 5, 359, 227, 0, 10, 0, 1, 3, 5,…
study_abroad_combined
|> write_rds("data-processed/01-study_abroad_combined.rds") study_abroad_combined