# 0 Load data and dependencies --------------------------------------------

library(RCurl) # for extracting data from web
library(lavaan) # for SEM
library(dplyr) # renaming variables
library(lme4) # for ICCs
library(performance) # for ICCs

data <- read.csv(text = getURL("https://raw.githubusercontent.com/flh3/pubdata/main/MLCFA/raw.csv")) %>% 
  rename(schoolID = sid)

# 0.5 Francis Huang's Function to Help Generate Covariances ---------------

mcfa.input <- function(gp, dat){
  dat1 <- dat[complete.cases(dat), ]
  g <- dat1[ ,gp] #grouping
  freq <- data.frame(table(g))
  gn <- grep(gp, names(dat1)) #which column number is the grouping var
  dat2 <- dat1[ ,-gn] #raw only
  G <- length(table(g))
  n <- nrow(dat2)
  k <- ncol(dat2)
  scaling <- (n^2 - sum(freq$Freq^2)) / (n*(G - 1))
  varn <- names(dat1[ ,-gn])
  ms <- matrix(0, n, k)
  for (i in 1:k){
    ms[,i] <- ave(dat2[ ,i], g)
  }   
  cs <- dat2 - ms #deviation matrix, centered scores
  colnames(ms) <- colnames(cs) <- varn
  b.cov <- (cov(ms) * (n - 1)) / (G - 1) #group level cov matrix
  w.cov <- (cov(cs) * (n - 1)) / (n - G) #individual level cov matrix
  pb.cov <- (b.cov - w.cov)/scaling #estimate of pure/adjusted between cov matrix
  w.cor <- cov2cor(w.cov) #individual level cor matrix
  b.cor <- cov2cor(b.cov) #group level cor matrix
  pb.cor <- cov2cor(pb.cov) #estimate of pure between cor matrix
  icc <- round(diag(pb.cov) / (diag(w.cov) + diag(pb.cov)), 3) #iccs
  return(list(b.cov = b.cov, pw.cov = w.cov, ab.cov = pb.cov, pw.cor = w.cor,
              b.cor = b.cor, ab.cor = pb.cor,
              n = n, G = G, c. = scaling, sqc = sqrt(scaling),
              icc = icc, dfw = n - G, dfb = G, 
              pw.data = data.frame(cs),
              b.data = data.frame(ms)
  ) )
}
x <- mcfa.input("schoolID", data) # name of cluster variable, name of data

# 1 Calculate Degree of Clustering ----------------------------------------

icc_x1 <- lmer(x1 ~ 1 + (1|schoolID), data = data, REML = F)
performance::icc(icc_x1)

icc_x2 <- lmer(x2 ~ 1 + (1|schoolID), data = data, REML = F)
performance::icc(icc_x2)

icc_x3 <- lmer(x3 ~ 1 + (1|schoolID), data = data, REML = F)
performance::icc(icc_x3)

icc_x4 <- lmer(x4 ~ 1 + (1|schoolID), data = data, REML = F)
performance::icc(icc_x4)

icc_x5 <- lmer(x5 ~ 1 + (1|schoolID), data = data, REML = F)
performance::icc(icc_x5)

icc_x6 <- lmer(x6 ~ 1 + (1|schoolID), data = data, REML = F)
performance::icc(icc_x6)

# 2 Regular CFA, ignoring clustering --------------------------------------

regular <- '

      engagement =~ NA*x1 + x2 + x3 + x4 + x5 + x6
      engagement ~~ 1*engagement

'
regular_fit <- cfa(model = regular, data = data)
summary(regular_fit, fit.measures = TRUE, standardized = TRUE)

# 3 Within CFA with S_pw --------------------------------------------------


within <- '

    engagement_w =~ NA*x1 + x2 + x3 + x4 + x5 + x6
    engagement_w ~~ 1*engagement_w

'
within_fit <- cfa(within, sample.cov = x$pw.cov, sample.nobs = x$n)
summary(within_fit, fit.measures = TRUE, standardized = TRUE)

# 4 Between CFA with S_b --------------------------------------------------

between <- '

    engagement_b =~ NA*x1 + x2 + x3 + x4 + x5 + x6
    engagement_b ~~ 1*engagement_b

'
between_fit <- cfa(between, sample.cov = x$b.cov, sample.nobs = x$G)
summary(between_fit, fit.measures = T, standardized = T)

# 5 Multilevel CFA --------------------------------------------------------

multilevel <- '

    level: 1
        engagement_w =~ NA*x1 + x2 + x3 + x4 + x5 + x6
        engagement_w ~~ 1*engagement_w
    
    level: 2
        engagement_b =~ NA*x1 + x2 + x3 + x4 + x5 + x6
        engagement_b ~~ 1*engagement_b
        
'
multilevel_fit <- cfa(model = multilevel, data = data, cluster = "schoolID")
summary(multilevel_fit, fit.measures = TRUE, standardized = TRUE)

# 6 Setting Start Values Example ------------------------------------------

multilevel <- '

    level: 1
        engagement_w =~ NA*x1 + start(0.642)*x2 + start(0.633)*x3 + 
                        start(0.450)*x4 + start(0.710)*x5 + start(0.800)*x6
        engagement_w ~~ 1*engagement_w
    
    level: 2
        engagement_b =~ NA*x1 + start(2.027)*x2 + start(1.564)*x3 + 
                        start(1.260)*x4 + start(1.901)*x5 + start(1.573)*x6
        engagement_b ~~ 1*engagement_b
        
'
