Main
poststratification table
census_school_type_raw <- readxl::read_excel("data/raw/729305_Zensus2011_Bildung_Schulform.xlsx",
sheet = "Migration und Schulform")
census_school_type <- census_school_type_raw %>%
# remove redundant columns of higher order categories (e.g., Deutsche mit Migrationshintergrund)
select(1:2, 21:28, 45:60, 69:76, 85:100) %>%
# exclude sex and age columns
select(3:50) %>%
# before iteratively naming the 48 columns, 8 (school types) * 6 (migration background)
set_names(map(c("Citizen: No mig background", # Personen ohne Migrationshintergrund
"Non-citizen: Own mig experience", # Ausländer/-innen mit eigener Migrationserfahrung
"Non-citizen: No own mig experience", # Ausländer/-innen ohne eigene Migrationserfahrung
"Citizen: Own mig experience", # Deutsche mit eigener Migrationserfahrung
"Citizen: Mig background from both parents", # Deutsche mit beidseitigem Migrationshintergrund
"Citizen: Mig background from one parent"), # Deutsche mit einseitigem Migrationshintergrund
~ paste0(.x, "_", c("total",
"ST7: No longer at school", # Entfällt, da kein/e Schüler/-innen
"ST1: Primary", # Grundschule
"ST2: Lower secondary", # Hauptschule
"ST3: Intermediate secondary", # Realschule
"ST4: Upper secondary", # Gymnasium
"ST5: Comprehensive school", # Gesamtschule
"ST6: Other school"))) %>% # Sonstige Schule
unlist()) %>%
# recover sex and age columns
mutate(age = as.numeric(parse_number(census_school_type_raw[[2]])),
male = census_school_type_raw[[1]],
) %>%
relocate(c(age,male)) %>%
mutate(male = ifelse(row_number() >= 115 & row_number() <= 215, TRUE, FALSE)) %>%
slice(-(1:113), -215) %>%
# disentangle mig and educ variables from one another
pivot_longer(cols = 3:50,
names_to = c("mig", "school_type"),
names_sep = "_",
values_to = "census_n") %>%
# filter(census_n != "/") %>%
# set censored cells to 0
mutate(census_n = ifelse(census_n == "/", 0, as.numeric(census_n)))
census_ISCED_raw <- readxl::read_excel("data/raw/729305_Zensus2011_Bildung_ISCED.xlsx", sheet = "Migration und ISCED")
census_ISCED <- census_ISCED_raw %>%
set_names(paste0("var", 1:133)) %>%
select(-(
census_ISCED_raw %>%
summarise(across(everything(), ~ any(str_detect(., "ISCED-Ebene")) & any(str_detect(., "Insgesamt")))) %>%
unlist() %>%
which()
)) %>%
select(1:2, var24:var34, var57:var78, var90:var100, var112:var133) %>%
select(3:56) %>%
set_names(map(c("Citizen: No mig background", # Personen ohne Migrationshintergrund
"Non-citizen: Own mig experience", # Ausländer/-innen mit eigener Migrationserfahrung
"Non-citizen: No own mig experience", # Ausländer/-innen ohne eigene Migrationserfahrung
"Citizen: Own mig experience", # Deutsche mit eigener Migrationserfahrung
"Citizen: Mig background from both parents", # Deutsche mit beidseitigem Migrationshintergrun,
"Citizen: Mig background from one parent"), # Deutsche mit einseitigem Migrationshintergrund
~ paste0(.x, "_", c("total",
"ISCED 1: Primary", # ISCED-Ebene 1 = Primärbereich
"ISCED 2: Lower secondary", # ISCED-Ebene 2 = Sekundarbereich I
"ISCED 3a: Upper secondary, general", # Sekundarbereich II A, allgemein bildend
"ISCED 3b: Upper secondary, vocational", # Sekundarbereich II B, beruflich
"ISCED 4: Post-secondary", # ISCED-Ebene 4 = Postsekundäre nichttertiäre Bildung
"ISCED 5a: Tertiary, e.g., college", # ISCED-Ebene 5 = Erste Stufe der tertiären Bildung, Tertiärbereich A
"ISCED 5b: Tertiary, e.g., Berufsakademie", # ISCED-Ebene 5 = Erste Stufe der tertiären Bildung, Tertiärbereich B
"ISCED 6: PhD" # ISCED-Ebene 6 = Zweite Stufe der tertiären Bildung
))) %>%
unlist()) %>%
mutate(age = as.numeric(parse_number(census_ISCED_raw[[2]])),
male = census_ISCED_raw[[1]]) %>%
relocate(c(age,male)) %>%
mutate(male = ifelse(row_number() >= 99 & row_number() <= 186, TRUE, FALSE)) %>%
slice(-(1:99), -186) %>%
pivot_longer(cols = 3:56,
names_to = c("mig", "isced"),
names_sep = "_",
values_to = "census_n") %>%
# filter(census_n != "/") %>%
mutate(census_n = ifelse(census_n == "/", 0, as.numeric(census_n)))
# 11 because that's the minimum age for the educ and mig variables we used in the TL sample
census_school_type_11_18 <- census_school_type %>%
filter((age >= 11 & age <= 18) & school_type != "total") %>%
rename(educ = "school_type") %>%
relocate(educ, .after = "mig")
census_ISCED_19_79 <- census_ISCED %>%
filter((age >= 19 & age <= 79) & isced != "total") %>%
rename(educ = "isced")
census <- census_school_type_11_18 %>%
bind_rows(census_ISCED_19_79)
# save(census, file="data/preprocessed/census.Rda")
# print random 14 rows/cells/subgroups/combinations out of the total 6528 in the poststratification table
census %>% slice_sample(n = 14) %>% kable %>% kable_styling(full_width = FALSE)
age
|
male
|
mig
|
educ
|
census_n
|
57
|
FALSE
|
Citizen: Mig background from one parent
|
ISCED 3b: Upper secondary, vocational
|
0
|
50
|
FALSE
|
Non-citizen: No own mig experience
|
ISCED 5a: Tertiary, e.g., college
|
0
|
24
|
TRUE
|
Non-citizen: No own mig experience
|
ISCED 6: PhD
|
0
|
44
|
TRUE
|
Non-citizen: Own mig experience
|
ISCED 5a: Tertiary, e.g., college
|
6010
|
51
|
FALSE
|
Citizen: Own mig experience
|
ISCED 6: PhD
|
0
|
53
|
TRUE
|
Citizen: Own mig experience
|
ISCED 1: Primary
|
3070
|
75
|
TRUE
|
Citizen: No mig background
|
ISCED 5a: Tertiary, e.g., college
|
42090
|
36
|
TRUE
|
Citizen: No mig background
|
ISCED 1: Primary
|
4720
|
71
|
FALSE
|
Non-citizen: Own mig experience
|
ISCED 3a: Upper secondary, general
|
440
|
77
|
FALSE
|
Non-citizen: No own mig experience
|
ISCED 1: Primary
|
0
|
73
|
TRUE
|
Citizen: Own mig experience
|
ISCED 1: Primary
|
3350
|
34
|
TRUE
|
Non-citizen: Own mig experience
|
ISCED 3a: Upper secondary, general
|
3300
|
54
|
FALSE
|
Citizen: Mig background from both parents
|
ISCED 3a: Upper secondary, general
|
0
|
27
|
FALSE
|
Citizen: Mig background from one parent
|
ISCED 1: Primary
|
0
|
Totals for the
disparities plot
age_marginals <- census_school_type_raw %>%
# total column, rows for ages 11 through 79
select(3) %>%
slice(23:91) %>%
mutate(n = as.numeric(`...3`),
category = as.character(11:79),
percentage = n/sum(n)*100,
source = "census",
variable = "age") %>%
select(-1)
sex_marginals <- census_school_type_raw %>%
# total column, rows for ages 11 through 79
select(3) %>%
slice((125:193), (227:295)) %>%
mutate(category = as.character(ifelse(row_number() < 70, T, F)),
n = as.numeric(`...3`)) %>%
group_by(category) %>%
summarize(n = sum(n)) %>%
mutate(source = "census",
variable = "male",
percentage = n/sum(n)*100)
school_type_marginals <- census_school_type_raw %>%
select(14:20) %>%
slice(23:30) %>%
set_names(paste0(c("ST7: No longer at school", # Entfällt, da kein/e Schüler/-innen
"ST1: Primary", # Grundschule
"ST2: Lower secondary", # Hauptschule
"ST3: Intermediate secondary", # Realschule
"ST4: Upper secondary", # Gymnasium
"ST5: Comprehensive school", # Gesamtschule
"ST6: Other school"))) %>% # Sonstige Schule
mutate_all(as.numeric) %>%
summarise(across(everything(), ~sum(., na.rm = TRUE))) %>%
pivot_longer(cols = everything(), names_to = "category", values_to = "n")
educ_marginals <- census_ISCED_raw %>%
select(14, 15, 17:19, 21, 22, 23) %>%
slice(17:77) %>%
set_names(paste0(c("ISCED 1: Primary",
"ISCED 2: Lower secondary",
"ISCED 3a: Upper secondary, general",
"ISCED 3b: Upper secondary, vocational",
"ISCED 4: Post-secondary",
"ISCED 5a: Tertiary, e.g., college",
"ISCED 5b: Tertiary, e.g., Berufsakademie",
"ISCED 6: PhD"))) %>%
mutate_all(as.numeric) %>%
summarise(across(everything(), ~sum(., na.rm = TRUE))) %>%
pivot_longer(cols = everything(), names_to = "category", values_to = "n") %>%
full_join(school_type_marginals, by = "category") %>%
mutate(n = ifelse((is.na(n.x) | is.na(n.y)), coalesce(n.x, n.y), n.x + n.y),
source = "census",
variable = "educ",
percentage = n/sum(n)*100) %>%
select(-c(n.x,n.y))
mig_marginals <- census_school_type_raw %>%
select(4, 7, 8, 10, 12, 13) %>%
slice(23:91) %>%
set_names(paste0(c("Citizen: No mig background",
"Non-citizen: Own mig experience",
"Non-citizen: No own mig experience",
"Citizen: Own mig experience",
"Citizen: Mig background from both parents",
"Citizen: Mig background from one parent"))) %>%
mutate_all(as.numeric) %>%
summarise(across(everything(), ~sum(., na.rm = TRUE))) %>%
pivot_longer(cols = everything(), names_to = "category", values_to = "n") %>%
mutate(source = "census",
variable = "mig",
percentage = n/sum(n)*100)
census_marginals <- bind_rows(age_marginals, educ_marginals, sex_marginals, mig_marginals)
save(census_marginals, file="data/preprocessed/census_marginals.Rda")