knitr::opts_chunk$set(
message = FALSE,
warning = TRUE,
include = TRUE,
error = TRUE,
fig.width = 8,
fig.height = 4
)
library(tidyverse)
library(haven)
library(ggrepel)
library(brms)
library(tidybayes)
library(rstan)
options(mc.cores = 4,
brms.backend = "cmdstanr",
scipen = 999,
digits = 4,
width = 120)
# windowsFonts(Times = windowsFont("Times New Roman"))
theme_set(theme_minimal(base_size = 12, base_family = "Times"))
load("../unshareable_data/preprocessed/tl.Rda")
load("data/preprocessed/de_census/census.Rda")
source("age_norm_comparisons.R")
Most continuous norming methods do not involve any weighting – relying only on the regression model to potentially estimates improve. The figures below show how our norming results would look without poststratifiying the regularised model’s prediction. Regularisation led to smoother and more precise mean estimates in each age group as compared to raw means even before poststratification. However, the regularised prediction model’s main function in RPP is facilitating poststratification’s work (i.e., the part that corrects for nonrepresentativeness). It is poststratification that causes estimates to actually differ on average from their raw counterparts. In our case, the model recognised the positive effect of education on CFT 20-R scores, and the poststratification corrected for the sample’s overrepresentation of more highly educated individuals as compared to the population. This underlies the difference between RP and RPP.
brm_beta_ints_no_educ_male <-
brm(bf(
cft ~ (1 | mig) + male + (1 | mig:male) + (1 | mig:educ) + s(age, by = educ),
sigma ~ (1 | mig) + (1 | educ) + male + s(age)
),
chains = 4,
seed = 810,
file = "../unshareable_data/brms/cft/brm_beta_ints_no_educ_male",
data = tl) %>%
add_criterion("loo")
RPP_vs_raw_vs_RP <- age_norm_comparisons(
brm_beta_ints_no_educ_male,
RP = c("census", "norming_sample"),
prediction_transform = list(
function(x) round(x*56) # for handling proportion predictons
),
labels = c( "Raw", "RPP", "RP"),
palette = c(
"#BC3C29FF",
"#0072B5FF",
"#E18727FF"
),
output_file = "data/results/RPP_vs_raw_vs_RP.rds"
)
RPP_vs_raw_vs_RP[-1]
## $overall_estimates
## # A tibble: 3 × 5
## Mean SE_of_Mean SD SE_of_SD Model
## <dbl> <dbl> <dbl> <dbl> <chr>
## 1 37.4 NA 8.19 NA Raw
## 2 35.7 0.116 8.59 0.0952 RPP_brm_beta_ints_no_educ_male
## 3 37.3 0.0736 8.21 0.0565 RP_brm_beta_ints_no_educ_male
##
## $means_plot
##
## $SDs_plot
## Warning: Removed 55 rows containing missing values or values outside the scale range (`geom_segment()`).
##
## $SEs_plot
##
## $percentile_plot
Besides the effect that poststratification has on means, it also smoothes SD estimates and considerably reduces SEs of both means and SDs.
Since the TwinLife sample is composed of families, one could argue that the dependence among estimates of family members violates the i.i.d assumption and leads to underestimation of variance. To check the robustness of our results against this violation, we ran the same RPP but with only one person chosen randomly out of each family, thus eliminating the dependency.
set.seed(14)
tl_1_per_fid <- tl %>%
group_by(fid) %>%
sample_n(1)
brm_beta_ints_no_educ_male_1_per_fid <-
brm(bf(
cft_prop ~ (1 | mig) + male + (1 | mig:male) + (1 | mig:educ) + s(age, by = educ),
phi ~ (1 | mig) + (1 | educ) + male + s(age) # Model precision parameter
),
family = Beta(), # Beta family for proportion data
chains = 4,
seed = 810,
file = "../unshareable_data/brms/cft/brm_beta_ints_no_educ_male_1_per_fid",
data = tl_1_per_fid) %>%
add_criterion("loo")
brm_beta_ints_no_educ_male_1_per_fid
## Warning: There were 32 divergent transitions after warmup. Increasing adapt_delta above 0.8 may help. See
## http://mc-stan.org/misc/warnings.html#divergent-transitions-after-warmup
## Family: beta
## Links: mu = logit; phi = log
## Formula: cft_prop ~ (1 | mig) + male + (1 | mig:male) + (1 | mig:educ) + s(age, by = educ)
## phi ~ (1 | mig) + (1 | educ) + male + s(age)
## Data: tl_1_per_fid (Number of observations: 4046)
## Draws: 4 chains, each with iter = 2000; warmup = 1000; thin = 1;
## total post-warmup draws = 4000
##
## Smoothing Spline Hyperparameters:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sds(sageeducISCED3b:Uppersecondaryvocational_1) 1.25 0.74 0.27 3.05 1.01 897 1864
## sds(sageeducISCED1:Primary_1) 7.57 2.85 3.52 14.43 1.00 1930 2453
## sds(sageeducISCED2:Lowersecondary_1) 3.34 1.42 1.34 6.71 1.00 1189 1814
## sds(sageeducISCED3a:Uppersecondarygeneral_1) 1.92 1.39 0.10 5.11 1.01 688 1288
## sds(sageeducISCED4:PostMsecondary_1) 1.76 1.08 0.30 4.31 1.00 870 1467
## sds(sageeducISCED5a:Tertiarye.g.college_1) 2.28 1.06 0.64 4.78 1.00 687 822
## sds(sageeducISCED5b:Tertiarye.g.coMopprogram_1) 0.77 0.45 0.22 1.90 1.00 2326 2806
## sds(sageeducISCED6:PhD_1) 2.01 1.09 0.68 4.72 1.00 1771 2427
## sds(sageeducST1:Primary_1) 3.15 3.13 0.12 11.77 1.00 2103 1978
## sds(sageeducST2:Lowersecondary_1) 1.76 1.72 0.06 5.96 1.00 1922 1775
## sds(sageeducST3:Intermediatesecondary_1) 3.39 2.14 0.94 8.92 1.00 1892 1598
## sds(sageeducST4:Uppersecondary_1) 3.06 1.48 1.20 6.95 1.00 2741 2390
## sds(sageeducST5:Comprehensiveschool_1) 3.52 2.01 1.18 8.90 1.00 2387 2338
## sds(sageeducST6:Otherschool_1) 3.25 1.85 1.10 8.31 1.00 2400 1858
## sds(sageeducST7:Nolongeratschool_1) 2.35 2.25 0.07 8.15 1.00 1489 958
## sds(phi_sage_1) 0.37 0.44 0.01 1.58 1.00 1112 1719
##
## Multilevel Hyperparameters:
## ~mig (Number of levels: 6)
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept) 0.19 0.11 0.07 0.46 1.00 1399 1851
## sd(phi_Intercept) 0.13 0.09 0.02 0.34 1.00 1356 1251
##
## ~mig:educ (Number of levels: 86)
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept) 0.09 0.03 0.04 0.16 1.00 638 1033
##
## ~mig:male (Number of levels: 12)
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept) 0.03 0.03 0.00 0.13 1.01 1211 1311
##
## ~educ (Number of levels: 15)
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(phi_Intercept) 0.17 0.06 0.09 0.30 1.00 1361 2366
##
## Regression Coefficients:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept 0.55 0.11 0.32 0.77 1.00 928 1726
## phi_Intercept 2.55 0.09 2.36 2.74 1.00 1775 1598
## maleTRUE 0.14 0.04 0.07 0.22 1.00 2054 1197
## phi_maleTRUE -0.05 0.05 -0.14 0.03 1.00 6699 2920
## sage:educISCED3b:Uppersecondaryvocational_1 -5.27 3.14 -13.01 -0.78 1.00 867 1496
## sage:educISCED1:Primary_1 -23.34 14.90 -56.24 2.82 1.00 1864 2546
## sage:educISCED2:Lowersecondary_1 -12.17 6.35 -27.26 -1.87 1.00 1265 1530
## sage:educISCED3a:Uppersecondarygeneral_1 2.06 4.38 -4.07 13.06 1.00 990 1402
## sage:educISCED4:PostMsecondary_1 1.45 3.88 -4.59 11.01 1.00 1273 1678
## sage:educISCED5a:Tertiarye.g.college_1 5.84 4.03 -1.19 14.59 1.00 771 959
## sage:educISCED5b:Tertiarye.g.coMopprogram_1 -1.54 1.82 -5.74 1.75 1.01 1572 1701
## sage:educISCED6:PhD_1 1.83 4.43 -7.32 11.08 1.00 2191 1791
## sage:educST1:Primary_1 2.74 9.95 -15.95 27.43 1.00 1676 1031
## sage:educST2:Lowersecondary_1 3.12 5.80 -8.94 14.88 1.00 2162 1705
## sage:educST3:Intermediatesecondary_1 3.91 9.19 -14.11 24.57 1.00 1696 1220
## sage:educST4:Uppersecondary_1 1.58 7.82 -14.20 17.53 1.01 1947 1851
## sage:educST5:Comprehensiveschool_1 1.53 9.99 -20.61 21.52 1.01 1786 1536
## sage:educST6:Otherschool_1 2.51 9.02 -16.26 20.63 1.00 1839 1703
## sage:educST7:Nolongeratschool_1 1.27 8.46 -15.57 19.50 1.00 1211 675
## phi_sage_1 -0.15 0.91 -2.33 1.21 1.00 1873 1761
##
## Draws were sampled using sample(hmc). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
main_vs_1_per_fam <- age_norm_comparisons(
brm_beta_ints_no_educ_male, brm_beta_ints_no_educ_male_1_per_fid,
prediction_transform = list(
function(x) round(x*56), # for handling proportion predictons
function(x) round(x*56) # for handling proportion predictions
),
labels = c( "Raw", "RPP, full sample", "RPP, 1/fam"),
palette = c(
"#BC3C29FF",
"#0072B5FF",
"#6F99ADFF"
),
output_file = "data/results/main_vs_1_per_fam.rds"
)
ggsave( "figures/S09_main_vs_1_per_fam.jpeg", main_vs_1_per_fam$means_plot, width = 8, height = 4)
ggsave( "figures/S09_main_vs_1_per_fam_percentile.jpeg", main_vs_1_per_fam$percentile_plot, width = 8, height = 4)
main_vs_1_per_fam[-1]
## $overall_estimates
## # A tibble: 3 × 5
## Mean SE_of_Mean SD SE_of_SD Model
## <dbl> <dbl> <dbl> <dbl> <chr>
## 1 37.4 NA 8.19 NA Raw
## 2 35.7 0.115 8.60 0.0942 RPP_brm_beta_ints_no_educ_male
## 3 35.7 0.168 8.68 0.146 RPP_brm_beta_ints_no_educ_male_1_per_fid
##
## $means_plot
##
## $SDs_plot
## Warning: Removed 55 rows containing missing values or values outside the scale range (`geom_segment()`).
##
## $SEs_plot
##
## $percentile_plot
Some participants were assigned an education category that combines two degrees with variable ISCED levels: “university of applied sciences, university of cooperative education”. While a degree from a university of applied sciences (Fachhochschule) would put one in ISCED level 5a, a degree from a university of cooperative education (Berufsakademie) is equivalent to ISCED level 5b. TwinLife assigns all people who have this ambivalent category to ISCED 5a. Here we test the robustness of our results against excluding those 357 participants.
tl_no_berufsakad_with_fachhochschule <- tl %>%
filter(!(eca0108 == "level 5a" & eca0230 == 8))
brm_beta_ints_no_educ_male_no_berufsakad_with_fachhochschule <-
brm(bf(
cft_prop ~ (1 | mig) + male + (1 | mig:male) + (1 | mig:educ) + s(age, by = educ),
phi ~ (1 | mig) + (1 | educ) + male + s(age) # Model precision parameter
),
family = Beta(), # Beta family for proportion data
chains = 4,
seed = 810,
file = "../unshareable_data/brms/cft/brm_beta_ints_no_educ_male_no_berufsakad_with_fachhochschule",
data = tl_no_berufsakad_with_fachhochschule) %>%
add_criterion("loo")
brm_beta_ints_no_educ_male_no_berufsakad_with_fachhochschule
## Warning: There were 36 divergent transitions after warmup. Increasing adapt_delta above 0.8 may help. See
## http://mc-stan.org/misc/warnings.html#divergent-transitions-after-warmup
## Family: beta
## Links: mu = logit; phi = log
## Formula: cft_prop ~ (1 | mig) + male + (1 | mig:male) + (1 | mig:educ) + s(age, by = educ)
## phi ~ (1 | mig) + (1 | educ) + male + s(age)
## Data: tl_no_berufsakad_with_fachhochschule (Number of observations: 9630)
## Draws: 4 chains, each with iter = 2000; warmup = 1000; thin = 1;
## total post-warmup draws = 4000
##
## Smoothing Spline Hyperparameters:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sds(sageeducISCED3b:Uppersecondaryvocational_1) 1.41 0.69 0.45 3.06 1.00 1082 1983
## sds(sageeducISCED1:Primary_1) 6.02 2.03 3.02 10.85 1.00 1815 2388
## sds(sageeducISCED2:Lowersecondary_1) 3.33 1.44 1.41 6.77 1.01 1124 1950
## sds(sageeducISCED3a:Uppersecondarygeneral_1) 2.46 1.22 0.63 5.52 1.00 1337 1583
## sds(sageeducISCED4:PostMsecondary_1) 1.18 0.65 0.33 2.75 1.00 1518 2434
## sds(sageeducISCED5a:Tertiarye.g.college_1) 2.64 0.92 1.25 4.82 1.00 1850 2300
## sds(sageeducISCED5b:Tertiarye.g.coMopprogram_1) 0.87 0.49 0.29 2.18 1.00 1939 2776
## sds(sageeducISCED6:PhD_1) 2.08 1.05 0.82 4.67 1.00 2337 2907
## sds(sageeducST1:Primary_1) 3.35 3.06 0.12 11.43 1.00 2192 2479
## sds(sageeducST2:Lowersecondary_1) 2.32 1.89 0.19 7.16 1.00 1487 1217
## sds(sageeducST3:Intermediatesecondary_1) 3.35 2.09 0.99 9.08 1.00 2444 2565
## sds(sageeducST4:Uppersecondary_1) 3.29 1.56 1.35 7.28 1.00 2857 2255
## sds(sageeducST5:Comprehensiveschool_1) 3.69 2.20 1.23 9.81 1.00 3061 2287
## sds(sageeducST6:Otherschool_1) 3.04 1.72 1.07 7.55 1.00 3052 2410
## sds(sageeducST7:Nolongeratschool_1) 3.09 2.40 0.16 9.23 1.00 2042 1774
## sds(phi_sage_1) 0.55 0.38 0.08 1.55 1.00 1475 1697
##
## Multilevel Hyperparameters:
## ~mig (Number of levels: 6)
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept) 0.23 0.11 0.10 0.51 1.00 1984 2575
## sd(phi_Intercept) 0.12 0.07 0.04 0.29 1.00 1637 2108
##
## ~mig:educ (Number of levels: 89)
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept) 0.08 0.02 0.04 0.13 1.00 972 2020
##
## ~mig:male (Number of levels: 12)
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept) 0.02 0.02 0.00 0.07 1.00 2141 2308
##
## ~educ (Number of levels: 15)
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(phi_Intercept) 0.14 0.04 0.08 0.23 1.00 2047 2710
##
## Regression Coefficients:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept 0.57 0.12 0.35 0.80 1.00 1506 1980
## phi_Intercept 2.57 0.07 2.42 2.71 1.00 2656 2771
## maleTRUE 0.11 0.02 0.07 0.15 1.00 4100 2482
## phi_maleTRUE -0.09 0.03 -0.15 -0.03 1.00 7156 2967
## sage:educISCED3b:Uppersecondaryvocational_1 -6.45 2.96 -13.10 -1.80 1.00 1098 1698
## sage:educISCED1:Primary_1 -25.18 8.32 -43.32 -10.28 1.00 2957 2669
## sage:educISCED2:Lowersecondary_1 -15.51 6.55 -30.05 -4.90 1.01 1141 1783
## sage:educISCED3a:Uppersecondarygeneral_1 3.28 4.84 -4.50 14.41 1.00 1744 2724
## sage:educISCED4:PostMsecondary_1 0.50 2.73 -4.01 6.73 1.00 2235 2274
## sage:educISCED5a:Tertiarye.g.college_1 6.41 3.63 -0.05 14.30 1.00 1582 2306
## sage:educISCED5b:Tertiarye.g.coMopprogram_1 -2.40 1.92 -6.81 0.90 1.00 1721 2259
## sage:educISCED6:PhD_1 3.12 4.40 -5.75 11.90 1.00 2971 2450
## sage:educST1:Primary_1 2.75 10.84 -20.95 27.08 1.00 2634 1853
## sage:educST2:Lowersecondary_1 4.06 7.38 -11.24 19.41 1.00 3381 2202
## sage:educST3:Intermediatesecondary_1 5.13 9.62 -12.34 27.76 1.00 2462 1749
## sage:educST4:Uppersecondary_1 3.04 8.53 -13.95 21.07 1.00 2527 1933
## sage:educST5:Comprehensiveschool_1 1.38 10.22 -21.87 21.91 1.01 2900 1787
## sage:educST6:Otherschool_1 2.47 8.44 -15.61 19.39 1.00 3223 2169
## sage:educST7:Nolongeratschool_1 2.57 9.45 -16.05 23.45 1.00 3025 2276
## phi_sage_1 -0.94 1.08 -3.37 0.74 1.00 2558 2932
##
## Draws were sampled using sample(hmc). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
main_vs_mod_isced <- age_norm_comparisons(
brm_beta_ints_no_educ_male, brm_beta_ints_no_educ_male_no_berufsakad_with_fachhochschule,
prediction_transform = list(
function(x) round(x*56), # for handling proportion predictons
function(x) round(x*56) # for handling proportion predictions
),
labels = c( "Raw", "RPP, full sample", "RPP, no BeAk/FHS"),
palette = c(
"#BC3C29FF",
"#0072B5FF",
"#6F99ADFF"
),
output_file = "data/results/main_vs_mod_isced.rds"
)
ggsave( "figures/S03_main_vs_mod_isced.jpeg", main_vs_mod_isced$means_plot, width = 8, height = 4)
ggsave( "figures/S03_main_vs_mod_isced_percentile.jpeg", main_vs_mod_isced$percentile_plot, width = 8, height = 4)
main_vs_mod_isced[-1]
## $overall_estimates
## # A tibble: 3 × 5
## Mean SE_of_Mean SD SE_of_SD Model
## <dbl> <dbl> <dbl> <dbl> <chr>
## 1 37.4 NA 8.19 NA Raw
## 2 35.7 0.116 8.59 0.0948 RPP_brm_beta_ints_no_educ_male
## 3 35.7 0.115 8.61 0.0907 RPP_brm_beta_ints_no_educ_male_no_berufsakad_with_fachhochschule
##
## $means_plot
##
## $SDs_plot
## Warning: Removed 55 rows containing missing values or values outside the scale range (`geom_segment()`).
##
## $SEs_plot
##
## $percentile_plot
tl_no_missing_mig <- tl %>%
filter(complete.cases(mig0520, mig2000, mig3100, mig3200))
brm_beta_ints_no_educ_male_no_missing_mig <-
brm(bf(
cft_prop ~ (1 | mig) + male + (1 | mig:male) + (1 | mig:educ) + s(age, by = educ),
phi ~ (1 | mig) + (1 | educ) + male + s(age) # Model precision parameter
),
family = Beta(), # Beta family for proportion data
chains = 4,
seed = 810,
file = "../unshareable_data/brms/cft/brm_beta_ints_no_educ_male_no_missing_mig",
data = tl_no_missing_mig) %>%
add_criterion("loo")
brm_beta_ints_no_educ_male_no_missing_mig
## Warning: There were 25 divergent transitions after warmup. Increasing adapt_delta above 0.8 may help. See
## http://mc-stan.org/misc/warnings.html#divergent-transitions-after-warmup
## Family: beta
## Links: mu = logit; phi = log
## Formula: cft_prop ~ (1 | mig) + male + (1 | mig:male) + (1 | mig:educ) + s(age, by = educ)
## phi ~ (1 | mig) + (1 | educ) + male + s(age)
## Data: tl_no_missing_mig (Number of observations: 9484)
## Draws: 4 chains, each with iter = 2000; warmup = 1000; thin = 1;
## total post-warmup draws = 4000
##
## Smoothing Spline Hyperparameters:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sds(sageeducISCED3b:Uppersecondaryvocational_1) 1.63 0.85 0.47 3.57 1.01 860 1830
## sds(sageeducISCED1:Primary_1) 7.12 2.45 3.59 12.96 1.00 1764 2123
## sds(sageeducISCED2:Lowersecondary_1) 4.18 1.84 1.69 8.61 1.00 896 1930
## sds(sageeducISCED3a:Uppersecondarygeneral_1) 2.04 1.30 0.19 5.23 1.00 655 1385
## sds(sageeducISCED4:PostMsecondary_1) 1.14 0.66 0.34 2.82 1.00 931 1755
## sds(sageeducISCED5a:Tertiarye.g.college_1) 2.57 0.99 1.12 5.00 1.00 993 1549
## sds(sageeducISCED5b:Tertiarye.g.coMopprogram_1) 0.86 0.46 0.29 2.08 1.00 2145 2906
## sds(sageeducISCED6:PhD_1) 2.19 1.04 0.85 4.87 1.00 2063 2890
## sds(sageeducST1:Primary_1) 2.28 2.16 0.08 8.05 1.00 2687 1992
## sds(sageeducST2:Lowersecondary_1) 2.30 2.02 0.11 7.33 1.00 1404 1118
## sds(sageeducST3:Intermediatesecondary_1) 2.68 1.77 0.75 7.31 1.00 2113 1587
## sds(sageeducST4:Uppersecondary_1) 3.18 1.48 1.32 7.02 1.00 2670 2433
## sds(sageeducST5:Comprehensiveschool_1) 3.49 2.05 1.17 9.14 1.00 2553 2863
## sds(sageeducST6:Otherschool_1) 3.13 1.93 1.04 8.29 1.00 2332 2249
## sds(sageeducST7:Nolongeratschool_1) 3.43 2.49 0.27 9.69 1.00 2078 1969
## sds(phi_sage_1) 0.49 0.37 0.05 1.39 1.00 1377 1933
##
## Multilevel Hyperparameters:
## ~mig (Number of levels: 6)
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept) 0.24 0.12 0.11 0.56 1.00 1406 1763
## sd(phi_Intercept) 0.14 0.08 0.05 0.33 1.00 1157 1605
##
## ~mig:educ (Number of levels: 89)
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept) 0.09 0.02 0.05 0.14 1.00 727 1801
##
## ~mig:male (Number of levels: 12)
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept) 0.02 0.02 0.00 0.07 1.00 1622 1580
##
## ~educ (Number of levels: 15)
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(phi_Intercept) 0.12 0.04 0.07 0.20 1.00 1673 2273
##
## Regression Coefficients:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept 0.57 0.12 0.34 0.82 1.01 939 1736
## phi_Intercept 2.57 0.08 2.41 2.72 1.00 1463 1636
## maleTRUE 0.12 0.02 0.08 0.17 1.00 2474 1805
## phi_maleTRUE -0.08 0.03 -0.14 -0.03 1.00 6185 2610
## sage:educISCED3b:Uppersecondaryvocational_1 -7.38 3.72 -15.91 -1.93 1.01 900 1603
## sage:educISCED1:Primary_1 -28.71 9.44 -49.06 -11.74 1.00 1747 2437
## sage:educISCED2:Lowersecondary_1 -19.86 8.45 -38.82 -6.12 1.00 828 1673
## sage:educISCED3a:Uppersecondarygeneral_1 2.13 4.71 -4.66 13.61 1.00 890 1144
## sage:educISCED4:PostMsecondary_1 0.43 2.84 -4.16 7.22 1.00 1122 1085
## sage:educISCED5a:Tertiarye.g.college_1 6.10 4.13 -0.94 15.87 1.00 802 1374
## sage:educISCED5b:Tertiarye.g.coMopprogram_1 -2.47 1.92 -7.02 0.81 1.00 1837 1714
## sage:educISCED6:PhD_1 3.76 4.72 -5.18 13.61 1.00 2202 2221
## sage:educST1:Primary_1 1.71 7.51 -13.92 16.53 1.00 2156 1622
## sage:educST2:Lowersecondary_1 3.59 7.30 -12.04 19.42 1.00 1879 1392
## sage:educST3:Intermediatesecondary_1 2.80 7.88 -13.21 18.32 1.00 2342 1809
## sage:educST4:Uppersecondary_1 2.13 8.08 -14.37 19.20 1.00 1949 1765
## sage:educST5:Comprehensiveschool_1 0.58 9.17 -20.39 17.94 1.00 2348 2257
## sage:educST6:Otherschool_1 3.21 9.05 -15.94 22.39 1.00 1990 1539
## sage:educST7:Nolongeratschool_1 2.85 10.11 -17.60 24.25 1.00 2441 1975
## phi_sage_1 -0.74 1.01 -3.14 0.81 1.00 2120 2506
##
## Draws were sampled using sample(hmc). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
main_vs_complete_mig <- age_norm_comparisons(
brm_beta_ints_no_educ_male, brm_beta_ints_no_educ_male_no_missing_mig,
prediction_transform = list(
function(x) round(x*56), # for handling proportion predictons
function(x) round(x*56) # for handling proportion predictions
),
labels = c( "Raw", "RPP, full sample", "RPP, no missing mig"),
palette = c(
"#BC3C29FF",
"#0072B5FF",
"#6F99ADFF"
),
output_file = "data/results/main_vs_complete_mig.rds"
)
ggsave( "figures/S02_main_vs_complete_mig.jpeg", main_vs_complete_mig$means_plot, width = 8, height = 4)
ggsave( "figures/S02_main_vs_complete_mig_percentile.jpeg", main_vs_complete_mig$percentile_plot, width = 8, height = 4)
main_vs_complete_mig[-1]
## $overall_estimates
## # A tibble: 3 × 5
## Mean SE_of_Mean SD SE_of_SD Model
## <dbl> <dbl> <dbl> <dbl> <chr>
## 1 37.4 NA 8.19 NA Raw
## 2 35.7 0.115 8.60 0.0942 RPP_brm_beta_ints_no_educ_male
## 3 35.7 0.115 8.56 0.0913 RPP_brm_beta_ints_no_educ_male_no_missing_mig
##
## $means_plot
##
## $SDs_plot
## Warning: Removed 55 rows containing missing values or values outside the scale range (`geom_segment()`).
##
## $SEs_plot
##
## $percentile_plot
sessionInfo()
## R version 4.4.1 (2024-06-14)
## Platform: x86_64-pc-linux-gnu
## Running under: Rocky Linux 9.6 (Blue Onyx)
##
## Matrix products: default
## BLAS/LAPACK: FlexiBLAS OPENBLAS; LAPACK version 3.11.0
##
## locale:
## [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8
## [5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8 LC_PAPER=en_US.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
##
## time zone: Europe/Berlin
## tzcode source: system (glibc)
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] rstan_2.32.6 StanHeaders_2.32.10 tidybayes_3.0.7 brms_2.22.0 Rcpp_1.0.12
## [6] ggrepel_0.9.6 haven_2.5.4 lubridate_1.9.4 forcats_1.0.0 stringr_1.5.1
## [11] dplyr_1.1.4 purrr_1.0.2 readr_2.1.5 tidyr_1.3.1 tibble_3.2.1
## [16] ggplot2_3.5.1 tidyverse_2.0.0
##
## loaded via a namespace (and not attached):
## [1] tidyselect_1.2.1 svUnit_1.0.6 farver_2.1.2 loo_2.8.0 fastmap_1.2.0
## [6] TH.data_1.1-2 tensorA_0.36.2.1 digest_0.6.36 estimability_1.5.1 timechange_0.3.0
## [11] lifecycle_1.0.4 processx_3.8.4 survival_3.8-3 magrittr_2.0.3 posterior_1.6.0
## [16] compiler_4.4.1 rlang_1.1.4 sass_0.4.9 tools_4.4.1 utf8_1.2.4
## [21] yaml_2.3.8 knitr_1.47 labeling_0.4.3 bridgesampling_1.1-2 pkgbuild_1.4.4
## [26] plyr_1.8.9 cmdstanr_0.8.0 multcomp_1.4-26 abind_1.4-8 withr_3.0.0
## [31] grid_4.4.1 stats4_4.4.1 fansi_1.0.6 xtable_1.8-4 colorspace_2.1-1
## [36] inline_0.3.20 emmeans_1.10.6 scales_1.3.0 MASS_7.3-64 cli_3.6.3
## [41] mvtnorm_1.3-2 rmarkdown_2.27 ragg_1.3.2 generics_0.1.3 RcppParallel_5.1.9
## [46] rstudioapi_0.16.0 reshape2_1.4.4 tzdb_0.4.0 cachem_1.1.0 splines_4.4.1
## [51] bayesplot_1.11.1 parallel_4.4.1 matrixStats_1.5.0 vctrs_0.6.5 Matrix_1.7-1
## [56] sandwich_3.1-1 jsonlite_1.8.8 hms_1.1.3 arrayhelpers_1.1-0 systemfonts_1.1.0
## [61] ggdist_3.3.2 jquerylib_0.1.4 glue_1.7.0 ps_1.7.6 codetools_0.2-20
## [66] distributional_0.5.0 stringi_1.8.4 gtable_0.3.6 QuickJSR_1.5.1 munsell_0.5.1
## [71] pillar_1.9.0 htmltools_0.5.8.1 Brobdingnag_1.2-9 R6_2.5.1 textshaping_0.4.0
## [76] evaluate_0.24.0 lattice_0.22-6 highr_0.11 backports_1.5.0 bslib_0.7.0
## [81] rstantools_2.4.0 coda_0.19-4.1 gridExtra_2.3 nlme_3.1-166 checkmate_2.3.2
## [86] mgcv_1.9-1 xfun_0.45 zoo_1.8-12 pkgconfig_2.0.3