13.6 Dynamic factor analysis

First load the plankton dataset from the MARSS package.

 library(MARSS)
 data(lakeWAplankton, package="MARSS")
 # we want lakeWAplanktonTrans, which has been transformed
 # so the 0s are replaced with NAs and the data z-scored
 dat <- lakeWAplanktonTrans
 # use only the 10 years from 1980-1989
 plankdat <- dat[dat[,"Year"]>=1980 & dat[,"Year"]<1990,]
 # create vector of phytoplankton group names
 phytoplankton <- c("Cryptomonas", "Diatoms", "Greens",
                   "Unicells", "Other.algae")
 # get only the phytoplankton
 dat.spp.1980 <- t(plankdat[,phytoplankton])
 # z-score the data since we subsetted time
 dat.spp.1980 <- dat.spp.1980-apply(dat.spp.1980,1,mean,na.rm=TRUE)
 dat.spp.1980 <- dat.spp.1980/sqrt(apply(dat.spp.1980,1,var,na.rm=TRUE))
 #check our z-score
 apply(dat.spp.1980,1,mean,na.rm=TRUE)
  Cryptomonas       Diatoms        Greens      Unicells   Other.algae 
 4.951913e-17 -1.337183e-17  3.737694e-18 -5.276451e-18  4.365269e-18 
 apply(dat.spp.1980,1,var,na.rm=TRUE)
Cryptomonas     Diatoms      Greens    Unicells Other.algae 
          1           1           1           1           1 

Plot the data.

#make into ts since easier to plot
dat.ts <- ts(t(dat.spp.1980),frequency=12, start=c(1980,1))
par(mfrow=c(3,2),mar=c(2,2,2,2))
for(i in 1:5) 
  plot(dat.ts[,i], type="b",
       main=colnames(dat.ts)[i],col="blue",pch=16)
Phytoplankton data.

Figure 13.3: Phytoplankton data.

Run a 3 trend model on these data.

mod_3 <- atsar::fit_dfa(y = dat.spp.1980, num_trends=3)

Rotate the estimated trends and look at what it produces.

rot <- atsar::rotate_trends(mod_3)
names(rot)
[1] "Z_rot"        "trends"       "Z_rot_mean"   "trends_mean" 
[5] "trends_lower" "trends_upper"

Plot the estimate of the trends.

matplot(t(rot$trends_mean),type="l",lwd=2,ylab="mean trend")
Trends.

Figure 13.4: Trends.

13.6.1 Using leave one out cross-validation to select models

We will fit multiple DFA with different numbers of trends and use leave one out (LOO) cross-validation to choose the best model.

mod_1 = atsar::fit_dfa(y = dat.spp.1980, num_trends=1)
mod_2 = atsar::fit_dfa(y = dat.spp.1980, num_trends=2)
mod_3 = atsar::fit_dfa(y = dat.spp.1980, num_trends=3)
mod_4 = atsar::fit_dfa(y = dat.spp.1980, num_trends=4)
Warning: There were 28 transitions after warmup that exceeded the maximum treedepth. Increase max_treedepth above 10. See
http://mc-stan.org/misc/warnings.html#maximum-treedepth-exceeded
Warning: Examine the pairs() plot to diagnose sampling problems
mod_5 = atsar::fit_dfa(y = dat.spp.1980, num_trends=5)
Warning: There were 6 transitions after warmup that exceeded the maximum treedepth. Increase max_treedepth above 10. See
http://mc-stan.org/misc/warnings.html#maximum-treedepth-exceeded

Warning: Examine the pairs() plot to diagnose sampling problems

We will compute the Leave One Out Information Criterion (LOOIC) using the loo package. Like AIC, lower is better.

loo::loo(loo::extract_log_lik(mod_1))$looic
[1] 1633.909

Table of the LOOIC values:

looics = c(loo::loo(loo::extract_log_lik(mod_1))$looic, loo::loo(loo::extract_log_lik(mod_2))$looic, 
    loo::loo(loo::extract_log_lik(mod_3))$looic, loo::loo(loo::extract_log_lik(mod_4))$looic, 
    loo::loo(loo::extract_log_lik(mod_5))$looic)
looic.table <- data.frame(trends = 1:5, LOOIC = looics)
looic.table
  trends    LOOIC
1      1 1633.909
2      2 1598.121
3      3 1474.484
4      4 1444.378
5      5 1421.217