11.6 Dynamic factor analysis

First load the plankton dataset from the MARSS package.

library(MARSS)
data(lakeWAplankton, package = "MARSS")
# we want lakeWAplanktonTrans, which has been transformed so
# the 0s are replaced with NAs and the data z-scored
dat = lakeWAplanktonTrans
# use only the 10 years from 1980-1989
plankdat = dat[dat[, "Year"] >= 1980 & dat[, "Year"] < 1990, 
    ]
# create vector of phytoplankton group names
phytoplankton = c("Cryptomonas", "Diatoms", "Greens", "Unicells", 
    "Other.algae")
# get only the phytoplankton
dat.spp.1980 = t(plankdat[, phytoplankton])
# z-score the data since we subsetted time
dat.spp.1980 = dat.spp.1980 - apply(dat.spp.1980, 1, mean, na.rm = TRUE)
dat.spp.1980 = dat.spp.1980/sqrt(apply(dat.spp.1980, 1, var, 
    na.rm = TRUE))
# check our z-score
apply(dat.spp.1980, 1, mean, na.rm = TRUE)
  Cryptomonas       Diatoms        Greens      Unicells   Other.algae 
 4.951913e-17 -1.337183e-17  3.737694e-18 -5.276451e-18  4.365269e-18 
apply(dat.spp.1980, 1, var, na.rm = TRUE)
Cryptomonas     Diatoms      Greens    Unicells Other.algae 
          1           1           1           1           1 

Plot the data.

# make into ts since easier to plot
dat.ts = ts(t(dat.spp.1980), frequency = 12, start = c(1980, 
    1))
par(mfrow = c(3, 2), mar = c(2, 2, 2, 2))
for (i in 1:5) plot(dat.ts[, i], type = "b", main = colnames(dat.ts)[i], 
    col = "blue", pch = 16)
Phytoplankton data.

Figure 11.3: Phytoplankton data.

Run a 3 trend model on these data.

mod_3 = fit_dfa(y = dat.spp.1980, num_trends = 3)

Rotate the estimated trends and look at what it produces.

rot = rotate_trends(mod_3)
names(rot)
[1] "Z_rot"        "trends"       "Z_rot_mean"   "trends_mean" 
[5] "trends_lower" "trends_upper"

Plot the estimate of the trends.

matplot(t(rot$trends_mean), type = "l", lwd = 2, ylab = "mean trend")
Trends.

Figure 11.4: Trends.

11.6.1 Using leave one out cross-validation to select models

We will fit multiple DFA with different numbers of trends and use leave one out (LOO) cross-validation to choose the best model.

mod_1 = fit_dfa(y = dat.spp.1980, num_trends = 1)
mod_2 = fit_dfa(y = dat.spp.1980, num_trends = 2)
mod_3 = fit_dfa(y = dat.spp.1980, num_trends = 3)
mod_4 = fit_dfa(y = dat.spp.1980, num_trends = 4)
mod_5 = fit_dfa(y = dat.spp.1980, num_trends = 5)

We will compute the Leave One Out Information Criterion (LOOIC) using the loo package. Like AIC, lower is better.

library(loo)
loo(extract_log_lik(mod_1))$looic
[1] 1659.081

Table of the LOOIC values:

looics = c(loo(extract_log_lik(mod_1))$looic, loo(extract_log_lik(mod_2))$looic, 
    loo(extract_log_lik(mod_3))$looic, loo(extract_log_lik(mod_4))$looic, 
    loo(extract_log_lik(mod_5))$looic)
looic.table = data.frame(trends = 1:5, LOOIC = looics)
looic.table
  trends    LOOIC
1      1 1659.081
2      2 1624.267
3      3 1473.331
4      4 1446.624
5      5 1431.074