CFA Invarianza ICILS 2023

Preparation

Setting the sesion and loading the data

# Seteo de sesión
library(pacman)

Warning: package 'pacman' was built under R version 4.4.3

pacman::p_load(
  dplyr, haven, sjlabelled,  psych,  purrr,  tidyr,  sjPlot,  ggplot2, 
  parameters,  table1,  car,  beeswarm,  lme4, scales, ggrepel, corrplot,
  ggtext, patchwork, lavaan, semTools, DT, knitr, kableExtra)

options(scipen = 999)
rm(list = ls())

# Cargamos los datos

pisa <- readRDS(here::here("input/data/proc_data/pisa22_proc.rds"))

CFA pooled model

model_cfa <- '
  gen_dse =~ search_info + evaluate_info + collect_data + 
  explain_content + share_content + pair_collab + 
  write_text +  + create_media + change_settings
  spec_dse =~ programming + develop_webpage
  '

m_cfa <- cfa(model = model_cfa, 
              data = pisa, 
              estimator = "DWLS",
              ordered = T,
              std.lv = F)

Warning: lavaan->lav_object_post_check():  
   some estimated ov variances are negative

standardizedsolution(m_cfa) %>% 
  filter(op == "=~") %>% 
  select(Factor = lhs, Indicator = rhs, `Loading (DWLS)` = est.std)

     Factor       Indicator Loading..DWLS.
1   gen_dse     search_info          0.909
2   gen_dse   evaluate_info          0.891
3   gen_dse    collect_data          0.834
4   gen_dse explain_content          0.887
5   gen_dse   share_content          0.906
6   gen_dse     pair_collab          0.889
7   gen_dse      write_text          0.890
8   gen_dse    create_media          0.844
9   gen_dse change_settings          0.809
10 spec_dse     programming          0.674
11 spec_dse develop_webpage          1.002

summary(m_cfa)

lavaan 0.6-19 ended normally after 32 iterations

  Estimator                                       DWLS
  Optimization method                           NLMINB
  Number of model parameters                        45

                                                  Used       Total
  Number of observations                        247752      393607

Model Test User Model:
                                                       
  Test statistic                              98875.541
  Degrees of freedom                                 43
  P-value (Chi-square)                            0.000

Parameter Estimates:

  Parameterization                               Delta
  Standard errors                             Standard
  Information                                 Expected
  Information saturated (h1) model        Unstructured

Latent Variables:
                   Estimate  Std.Err  z-value  P(>|z|)
  gen_dse =~                                          
    search_info       1.000                           
    evaluate_info     0.981    0.001 1347.797    0.000
    collect_data      0.918    0.001 1333.543    0.000
    explain_contnt    0.976    0.001 1473.984    0.000
    share_content     0.998    0.001 1465.273    0.000
    pair_collab       0.978    0.001 1458.512    0.000
    write_text        0.980    0.001 1441.677    0.000
    create_media      0.929    0.001 1345.320    0.000
    change_settngs    0.890    0.001 1264.131    0.000
  spec_dse =~                                         
    programming       1.000                           
    develop_webpag    1.487    0.003  551.730    0.000

Covariances:
                   Estimate  Std.Err  z-value  P(>|z|)
  gen_dse ~~                                          
    spec_dse          0.437    0.001  590.982    0.000

Thresholds:
                   Estimate  Std.Err  z-value  P(>|z|)
    search_info|t1   -1.534    0.004 -387.962    0.000
    search_info|t2   -1.170    0.003 -359.253    0.000
    search_info|t3   -0.379    0.003 -146.697    0.000
    evaluate_nf|t1   -1.617    0.004 -387.958    0.000
    evaluate_nf|t2   -1.111    0.003 -350.180    0.000
    evaluate_nf|t3   -0.065    0.003  -25.915    0.000
    collect_dat|t1   -1.439    0.004 -385.048    0.000
    collect_dat|t2   -0.866    0.003 -299.383    0.000
    collect_dat|t3    0.082    0.003   32.620    0.000
    expln_cntnt|t1   -1.550    0.004 -388.137    0.000
    expln_cntnt|t2   -1.035    0.003 -336.581    0.000
    expln_cntnt|t3   -0.092    0.003  -36.492    0.000
    share_cntnt|t1   -1.580    0.004 -388.244    0.000
    share_cntnt|t2   -1.121    0.003 -351.733    0.000
    share_cntnt|t3   -0.227    0.003  -89.494    0.000
    pair_collab|t1   -1.629    0.004 -387.753    0.000
    pair_collab|t2   -1.143    0.003 -355.314    0.000
    pair_collab|t3   -0.230    0.003  -90.600    0.000
    write_text|t1    -1.628    0.004 -387.776    0.000
    write_text|t2    -1.130    0.003 -353.275    0.000
    write_text|t3    -0.236    0.003  -92.620    0.000
    create_medi|t1   -1.480    0.004 -386.691    0.000
    create_medi|t2   -0.896    0.003 -306.522    0.000
    create_medi|t3    0.019    0.003    7.558    0.000
    chng_sttngs|t1   -1.489    0.004 -386.968    0.000
    chng_sttngs|t2   -0.944    0.003 -317.730    0.000
    chng_sttngs|t3   -0.062    0.003  -24.742    0.000
    programming|t1   -0.721    0.003 -260.298    0.000
    programming|t2   -0.101    0.003  -40.187    0.000
    programming|t3    0.668    0.003  244.277    0.000
    develp_wbpg|t1   -1.139    0.003 -354.708    0.000
    develp_wbpg|t2   -0.499    0.003 -189.306    0.000
    develp_wbpg|t3    0.375    0.003  145.191    0.000

Variances:
                   Estimate  Std.Err  z-value  P(>|z|)
   .search_info       0.175                           
   .evaluate_info     0.206                           
   .collect_data      0.305                           
   .explain_contnt    0.214                           
   .share_content     0.179                           
   .pair_collab       0.210                           
   .write_text        0.207                           
   .create_media      0.287                           
   .change_settngs    0.346                           
   .programming       0.546                           
   .develop_webpag   -0.004                           
    gen_dse           0.825    0.001 1067.492    0.000
    spec_dse          0.454    0.001  391.728    0.000

fit_pisa <- fitMeasures(m_cfa,
                         c("chisq", "df", "pvalue", "cfi", "tli", "rmsea", "srmr"))

knitr::kable(fit_pisa, digits = 3)

	x
chisq	98875.54115382
df	43.00000000
pvalue	0.00000000
cfi	0.99527934
tli	0.99396194
rmsea	0.09631813
srmr	0.05758094

CFA Multigroup-Country Invariance

Estimations

multigroup_cfa <- '
  gen_dse =~ search_info + evaluate_info + collect_data + 
  explain_content + share_content + pair_collab + 
  write_text +  + create_media + change_settings
  spec_dse =~ programming + develop_webpage
  '
  

# Modelo configural
fitgroup <- cfa(model = multigroup_cfa, 
                data = pisa, 
                group = "CNT",
                ordered = TRUE
              )

Warning: lavaan->lav_object_post_check():  
   some estimated ov variances are negative

# demora 2 minutos


# Modelo métrico (fijando las cargas)

fitgroup_metric <- cfa(model = multigroup_cfa, 
                       data = pisa, 
                       group = "CNT",
                       ordered = TRUE,
                      # missing = "fiml"
                      group.equal = "loadings")

Warning: lavaan->lav_object_post_check():  
   some estimated ov variances are negative

# 3 minutos

# Modelo escalar (fijando cargas y interceptos)
memory.limit(size = 5000) # Si tienes 16 GB de RAM, por ejemplo

Warning: 'memory.limit()' is no longer supported

[1] Inf

fitgroup_scalar <- cfa(model = multigroup_cfa, 
                  data = pisa, 
                  group = "CNT",
                  ordered = TRUE,
                #  missing = "fiml",
                  group.equal = c("loadings", "intercepts"))

Warning: lavaan->lav_object_post_check():  
   some estimated ov variances are negative

Summary table

# Extracción de indicadores de ajuste

fitmeasures_config <- fitMeasures(fitgroup,
                         c("chisq", "df", "pvalue", "cfi", "tli", "rmsea", "srmr"))

Warning: lavaan->lav_object_post_check():  
   some estimated ov variances are negative
Warning: lavaan->lav_object_post_check():  
   some estimated ov variances are negative

fitmeasures_metric <- fitMeasures(fitgroup_metric,
                         c("chisq", "df", "pvalue", "cfi", "tli", "rmsea", "srmr"))

Warning: lavaan->lav_object_post_check():  
   some estimated ov variances are negative
Warning: lavaan->lav_object_post_check():  
   some estimated ov variances are negative

fitmeasures_scalar <- fitMeasures(fitgroup_scalar,
                         c("chisq", "df", "pvalue", "cfi", "tli", "rmsea", "srmr"))

# Todo se demora aprox 2 minutos

# Tabla resumen (html y opción latex)

tabla_completa <- data.frame(
  Modelo = c("1. Configural", "2. Métrico", "3. Escalar"),
  chisq = c(fitmeasures_config["chisq"], fitmeasures_metric["chisq"], fitmeasures_scalar["chisq"]),
  df = c(fitmeasures_config["df"], fitmeasures_metric["df"], fitmeasures_scalar["df"]),
  CFI = c(fitmeasures_config["cfi"], fitmeasures_metric["cfi"], fitmeasures_scalar["cfi"]),
  TLI = c(fitmeasures_config["tli"], fitmeasures_metric["tli"], fitmeasures_scalar["tli"]),
  RMSEA = c(fitmeasures_config["rmsea"], fitmeasures_metric["rmsea"], fitmeasures_scalar["rmsea"]),
  SRMR = c(fitmeasures_config["srmr"], fitmeasures_metric["srmr"], fitmeasures_scalar["srmr"])
)

# Cálculo deltas

tabla_completa$delta_chisq <- c(NA, diff(tabla_completa$chisq))
tabla_completa$delta_df    <- c(NA, diff(tabla_completa$df))
tabla_completa$delta_CFI   <- c(NA, diff(tabla_completa$CFI))
tabla_completa$delta_RMSEA <- c(NA, diff(tabla_completa$RMSEA))

# ANOVA para significancia

test_anova <- anova(fitgroup, fitgroup_metric, fitgroup_scalar)

Warning: lavaan->lavTestLRT():  
   Some restricted models fit better than less restricted models; either 
   these models are not nested, or the less restricted model failed to reach 
   a global optimum.Smallest difference = -9182.17722022042.

# 3-4 minutos

tabla_completa$p_value_delta_chisq <- test_anova$`Pr(>Chisq)`

invariance_table_cnt <- knitr::kable(
  tabla_completa,
  format = "html",
  digits = 3,
  caption = "Análisis de invarianza por países",
col.names = c(
  "Modelo", 
  "&#x03C7;<sup>2</sup>",  # chi²
  "df", "CFI", "TLI", "RMSEA", "SRMR", 
  "&#x0394;&#x03C7;<sup>2</sup>",  # Δχ²
  "&#x0394;df",              # Δdf
  "&#x0394;CFI",             # ΔCFI
  "&#x0394;RMSEA",           # ΔRMSEA
  "p-valor (&#x0394;&#x03C7;<sup>2</sup>)" # p-valor (Δχ²)
),
  booktabs = TRUE,
  escape=FALSE
) %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"))

invariance_table_cnt

Análisis de invarianza por países
Modelo	χ²	df	CFI	TLI	RMSEA	SRMR	Δχ²	Δdf	ΔCFI	ΔRMSEA	p-valor (Δχ²)
1. Configural	113431.5	2236	0.995	0.994	0.102	0.062	NA	NA	NA	NA	NA
2. Métrico	145381.1	2695	0.994	0.994	0.105	0.069	31949.649	459	-0.001	0.003	0
3. Escalar	136199.0	3715	0.995	0.996	0.087	0.062	-9182.177	1020	0.000	-0.019	1

Loading by country

# Obtenemos todos los parámetros

format(Sys.time(), tz = "Chile/Continental")

[1] "2025-08-07 16:00:31"

all_params <- parameterEstimates(fitgroup, standardized = TRUE)

# Operador =~

factor_loadings_all_groups <- all_params %>%
  filter(op == "=~")

# Extraemos las labels

group_labels <- lavInspect(fitgroup, "group.label")

# Dataframe con todos los países

country_map <- data.frame(group = 1:length(group_labels), CNT = unlist(group_labels))

# Dataframe con ambas dimensiones

factor_loadings_all_groups <- factor_loadings_all_groups %>%
  left_join(country_map, by = "group")

# Dataframe gen dse

loadings_gen_dse <- factor_loadings_all_groups %>%
  filter(lhs == "gen_dse") %>%
  select(CNT, item = rhs, loading = std.all, latent_factor = lhs)

# Etiquetas tres países con peor ajuste

labels_gen_dse <- loadings_gen_dse %>%
  group_by(item) %>%
  slice_min(order_by = loading, n = 3) %>%
  ungroup()

# Dataframe spec dse

loadings_spec_dse <- factor_loadings_all_groups %>%
  filter(lhs == "spec_dse") %>%
  select(CNT, item = rhs, loading = std.all, latent_factor = lhs)

# Etiquetas tres países con menor ajuste

labels_spec_dse <- loadings_spec_dse %>%
  group_by(item) %>%
  slice_min(order_by = loading, n = 3) %>%
  ungroup()

# Gráfico cleaveland autoeficacia general

library(forcats)

Warning: package 'forcats' was built under R version 4.4.3


Attaching package: 'forcats'

The following object is masked from 'package:sjlabelled':

    as_factor

plot_gen_dse <- ggplot(loadings_gen_dse,
                       aes(x = loading,
                           y = fct_reorder(item, loading, .fun = median, .desc = FALSE))) +
  geom_point(aes(color = '#B42012'), size = 3, alpha = 0.7) + 
    geom_text(data = labels_gen_dse, 
            aes(label = CNT), 
            nudge_y = 0.3,
            size = 2.5,
            color = "black",
            check_overlap = TRUE) +
  scale_color_identity(name = "Factor", 
                       guide = "legend", 
                       labels = c("General")) +
  labs(title = "Cargas Factoriales: Autoeficacia General",
       x = "Carga Factorial Estandarizada",
       y = "Ítem") +
  theme_minimal(base_size = 10) +
  theme(axis.text.y = element_text(size = 8),
        plot.title = element_text(hjust = 0.5),
        legend.position = "top")

plot_gen_dse

# Cleaveland autoeficacia especializada
# Fijar el gráfico a x = .3
plot_spec_dse <- ggplot(loadings_spec_dse,
                        aes(x = loading,
                            y = fct_reorder(item, loading, .fun = median, .desc = FALSE))) +
  geom_point(aes(color = '#E16462'), size = 3, alpha = 0.7) +
    geom_text_repel(data = labels_spec_dse, 
                  aes(label = CNT), 
                  size = 2.5,
                  color = "black",
                  box.padding = 0.4,
                  point.padding = 0.2,
                  min.segment.length = 0,
                  max.overlaps = Inf) +
  scale_color_identity(name = "Factor", 
                       guide = "legend", 
                       labels = c("Especializada")) +
  labs(title = "Cargas Factoriales: Autoeficacia Especializada", 
       x = "Carga Factorial Estandarizada",
       y = "Ítem") +
  theme_minimal(base_size = 10) +
  theme(axis.text.y = element_text(size = 8),
        plot.title = element_text(hjust = 0.5),
        legend.position = "top")

plot_spec_dse

Fit by countries

# SEM por separado para cada país

lista_paises <- unique(unlist(pisa$CNT))

resultados_lista <- list()
summary_lista <- list()

# Iteramos por cada país con el mismo modelo
for (pais_actual in lista_paises) {
  format(Sys.time(), tz = "Chile/Continental")
  datos_pais <- pisa %>%
    filter(CNT == pais_actual)
  
  fit_pais <- tryCatch({
    
    sem(model = multigroup_cfa, 
        data = datos_pais,
        ordered = TRUE
       # missing = "fiml"
       )
  }, error = function(e) {
    
     cat("  -> ¡ERROR! No se pudo ajustar el modelo para", pais_actual, ". Razón:", conditionMessage(e), "\n")
     return(NULL)
  })
    
  if (!is.null(fit_pais)) {
     if (lavInspect(fit_pais, "converged")) {
        medidas_ajuste <- fitMeasures(fit_pais)
        resultados_lista[[pais_actual]] <- medidas_ajuste
     } else {
        cat("  -> ¡ADVERTENCIA! El modelo para", pais_actual, "no convergió. Se omitirán sus resultados.\n")
        resultados_lista[[pais_actual]] <- rep(NA)
        
     }
  }
  format(Sys.time(), tz = "Chile/Continental")
}

Warning: lavaan->lav_object_post_check():  
   some estimated ov variances are negative
Warning: lavaan->lav_object_post_check():  
   some estimated ov variances are negative
Warning: lavaan->lav_object_post_check():  
   some estimated ov variances are negative
Warning: lavaan->lav_object_post_check():  
   some estimated ov variances are negative
Warning: lavaan->lav_object_post_check():  
   some estimated ov variances are negative
Warning: lavaan->lav_object_post_check():  
   some estimated ov variances are negative
Warning: lavaan->lav_object_post_check():  
   some estimated ov variances are negative
Warning: lavaan->lav_object_post_check():  
   some estimated ov variances are negative
Warning: lavaan->lav_object_post_check():  
   some estimated ov variances are negative
Warning: lavaan->lav_object_post_check():  
   some estimated ov variances are negative
Warning: lavaan->lav_object_post_check():  
   some estimated ov variances are negative
Warning: lavaan->lav_object_post_check():  
   some estimated ov variances are negative
Warning: lavaan->lav_object_post_check():  
   some estimated ov variances are negative
Warning: lavaan->lav_object_post_check():  
   some estimated ov variances are negative
Warning: lavaan->lav_object_post_check():  
   some estimated ov variances are negative
Warning: lavaan->lav_object_post_check():  
   some estimated ov variances are negative
Warning: lavaan->lav_object_post_check():  
   some estimated ov variances are negative
Warning: lavaan->lav_object_post_check():  
   some estimated ov variances are negative
Warning: lavaan->lav_object_post_check():  
   some estimated ov variances are negative
Warning: lavaan->lav_object_post_check():  
   some estimated ov variances are negative
Warning: lavaan->lav_object_post_check():  
   some estimated ov variances are negative
Warning: lavaan->lav_object_post_check():  
   some estimated ov variances are negative
Warning: lavaan->lav_object_post_check():  
   some estimated ov variances are negative
Warning: lavaan->lav_object_post_check():  
   some estimated ov variances are negative
Warning: lavaan->lav_object_post_check():  
   some estimated ov variances are negative
Warning: lavaan->lav_object_post_check():  
   some estimated ov variances are negative
Warning: lavaan->lav_object_post_check():  
   some estimated ov variances are negative
Warning: lavaan->lav_object_post_check():  
   some estimated ov variances are negative
Warning: lavaan->lav_object_post_check():  
   some estimated ov variances are negative
Warning: lavaan->lav_object_post_check():  
   some estimated ov variances are negative
Warning: lavaan->lav_object_post_check():  
   some estimated ov variances are negative
Warning: lavaan->lav_object_post_check():  
   some estimated ov variances are negative
Warning: lavaan->lav_object_post_check():  
   some estimated ov variances are negative
Warning: lavaan->lav_object_post_check():  
   some estimated ov variances are negative
Warning: lavaan->lav_object_post_check():  
   some estimated ov variances are negative
Warning: lavaan->lav_object_post_check():  
   some estimated ov variances are negative
Warning: lavaan->lav_object_post_check():  
   some estimated ov variances are negative
Warning: lavaan->lav_object_post_check():  
   some estimated ov variances are negative
Warning: lavaan->lav_object_post_check():  
   some estimated ov variances are negative
Warning: lavaan->lav_object_post_check():  
   some estimated ov variances are negative
Warning: lavaan->lav_object_post_check():  
   some estimated ov variances are negative
Warning: lavaan->lav_object_post_check():  
   some estimated ov variances are negative
Warning: lavaan->lav_object_post_check():  
   some estimated ov variances are negative
Warning: lavaan->lav_object_post_check():  
   some estimated ov variances are negative
Warning: lavaan->lav_object_post_check():  
   some estimated ov variances are negative
Warning: lavaan->lav_object_post_check():  
   some estimated ov variances are negative
Warning: lavaan->lav_object_post_check():  
   some estimated ov variances are negative
Warning: lavaan->lav_object_post_check():  
   some estimated ov variances are negative
Warning: lavaan->lav_object_post_check():  
   some estimated ov variances are negative
Warning: lavaan->lav_object_post_check():  
   some estimated ov variances are negative
Warning: lavaan->lav_object_post_check():  
   some estimated ov variances are negative
Warning: lavaan->lav_object_post_check():  
   some estimated ov variances are negative
Warning: lavaan->lav_object_post_check():  
   some estimated ov variances are negative
Warning: lavaan->lav_object_post_check():  
   some estimated ov variances are negative
Warning: lavaan->lav_object_post_check():  
   some estimated ov variances are negative
Warning: lavaan->lav_object_post_check():  
   some estimated ov variances are negative
Warning: lavaan->lav_object_post_check():  
   some estimated ov variances are negative
Warning: lavaan->lav_object_post_check():  
   some estimated ov variances are negative
Warning: lavaan->lav_object_post_check():  
   some estimated ov variances are negative
Warning: lavaan->lav_object_post_check():  
   some estimated ov variances are negative
Warning: lavaan->lav_object_post_check():  
   some estimated ov variances are negative
Warning: lavaan->lav_object_post_check():  
   some estimated ov variances are negative
Warning: lavaan->lav_object_post_check():  
   some estimated ov variances are negative
Warning: lavaan->lav_object_post_check():  
   some estimated ov variances are negative
Warning: lavaan->lav_object_post_check():  
   some estimated ov variances are negative
Warning: lavaan->lav_object_post_check():  
   some estimated ov variances are negative
Warning: lavaan->lav_object_post_check():  
   some estimated ov variances are negative
Warning: lavaan->lav_object_post_check():  
   some estimated ov variances are negative
Warning: lavaan->lav_object_post_check():  
   some estimated ov variances are negative
Warning: lavaan->lav_object_post_check():  
   some estimated ov variances are negative
Warning: lavaan->lav_object_post_check():  
   some estimated ov variances are negative
Warning: lavaan->lav_object_post_check():  
   some estimated ov variances are negative
Warning: lavaan->lav_object_post_check():  
   some estimated ov variances are negative
Warning: lavaan->lav_object_post_check():  
   some estimated ov variances are negative
Warning: lavaan->lav_object_post_check():  
   some estimated ov variances are negative
Warning: lavaan->lav_object_post_check():  
   some estimated ov variances are negative
Warning: lavaan->lav_object_post_check():  
   some estimated ov variances are negative
Warning: lavaan->lav_object_post_check():  
   some estimated ov variances are negative

### Test con chile (o otros países)

datos_test <- pisa %>%
    filter(CNT == "CHL")

test_model <- sem(model = multigroup_cfa, 
        data = datos_test,
        ordered = TRUE)

Warning: lavaan->lav_object_post_check():  
   some estimated ov variances are negative

#fitMeasures(test_model)

# Convertimos resultados_lista en dataframe y agregamos CNT para detectar país

fit_indices_por_pais <- do.call(rbind, resultados_lista)

fit_indices_por_pais <- as.data.frame(fit_indices_por_pais)

fit_indices_por_pais <- fit_indices_por_pais %>%
  tibble::rownames_to_column(var = "CNT")

# Seleccionamos indicadores de interés

indices_ajuste <- fit_indices_por_pais %>% 
  select(CNT, chisq, df, rmsea, rmsea.ci.lower,  rmsea.ci.upper, srmr, cfi, tli)

datatable(
  indices_ajuste,
  options = list(
    pageLength = 10,
    autoWidth = TRUE,
    searchHighlight = TRUE
  ),
  filter = 'top',
  rownames = FALSE,
  caption = "Índices de Ajuste del Modelo (Interactiva)"
)