# 0 Load data and dependencies --------------------------------------------

library(dplyr) # for data manipulation
library(RCurl) # for retrieving data from Internet
library(tidyr) # for pivoting data
library(lme4) # for multilevel modelling
library(lavaan) # for factor analysis
library(nlme) # for multilevel modelling

data <- read.csv(text = getURL("https://raw.githubusercontent.com/flh3/pubdata/main/MLCFA/raw.csv"))

data_t <- data %>% 
  rename(schoolID = sid) %>% 
  mutate(teacherID = 1:nrow(data)) %>% # add row for teacher ID
  pivot_longer(
    cols = starts_with("x"),
    names_to = "item",
    names_prefix = "x",
    values_to = "response"
  ) %>% 
  mutate(
    x1 = ifelse(item == 1, 1, 0),
    x2 = ifelse(item == 2, 1, 0),
    x3 = ifelse(item == 3, 1, 0),
    x4 = ifelse(item == 4, 1, 0),
    x5 = ifelse(item == 5, 1, 0),
    x6 = ifelse(item == 6, 1, 0)
  ) %>% 
  select(response, x1:x6, item, teacherID, schoolID)

# 1 MLM Single Factor -----------------------------------------------------

# Running a single factor with items nested within teacher

mlm <- lmer(response ~ 1 + x2 + x3 + x4 + x5 + x6 + (1|teacherID),
             data = data_t,
             REML = TRUE)
summary(mlm)

# item means for comparison
mean(data$x1, na.rm = T)
mean(data$x2, na.rm = T)
mean(data$x3, na.rm = T)
mean(data$x4, na.rm = T)
mean(data$x5, na.rm = T)
mean(data$x6, na.rm = T)

# Unit variance identification SEM comparison
sem <- '

      engagement =~ NA*x1 + x2 + x3 + x4 + x5 + x6
      
      engagement ~~ 1*engagement

'
sem_fit <- cfa(model = sem, data = data, meanstructure = TRUE) # print means/intercepts
summary(sem_fit,
        fit.measures = TRUE, 
        standardized = TRUE)

# nlme MLM approach

mlm_nlme <- lme(
  response ~ 0 + x1 + x2 + x3 + x4 + x5 + x6,
  data = data_t,
  random = ~ 1 | teacherID,
  weights = varIdent(form = ~ 1 | item)
)
summary(mlm_nlme)

# level-1 variance components
(c(1.000, coef(mlm_nlme$modelStruct$varStruct, unconstrained = FALSE))*mlm_nlme$sigma)^2

# level-2 error variance (factor variance)
.7604**2

# SEM with loadings constrained to 1
sem <- '

      engagement =~ 1*x1 + 1*x2 + 1*x3 + 1*x4 + 1*x5 + 1*x6
      
      engagement ~~ engagement

'
sem_fit <- cfa(model = sem, data = data, meanstructure = TRUE)
summary(sem_fit,
        fit.measures = TRUE, 
        standardized = TRUE)
