Today we learn:
Load packages for today:
library(lmerTest)
## Loading required package: lme4
## Loading required package: Matrix
##
## Attaching package: 'lmerTest'
## The following object is masked from 'package:lme4':
##
## lmer
## The following object is masked from 'package:stats':
##
## step
library(ggplot2)
Load data:
download.file("https://timotheenivalis.github.io/data/thorndata.csv",
destfile = "data/thorndata.csv")
download.file("https://timotheenivalis.github.io/data/droughtdata.csv",
destfile = "data/droughtdata.csv")
download.file("https://timotheenivalis.github.io/data/roo.csv",
destfile = "data/roo.csv")
download.file("https://timotheenivalis.github.io/data/storks.csv",
destfile = "data/storks.csv")
We model the relationship EscapeDistance / Tail_Length.
roo <- read.csv("data/roo.csv")
ggplot(roo, aes(x=Tail_Length, y=EscapeDistance)) +
geom_point(aes(color=as.factor(Year)), alpha=0.5) +
geom_smooth(method = "lm")
## `geom_smooth()` using formula 'y ~ x'
mm0 <- lmer(EscapeDistance ~ Tail_Length + (1|Year), data = roo)
summary(mm0)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: EscapeDistance ~ Tail_Length + (1 | Year)
## Data: roo
##
## REML criterion at convergence: 15630
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.6300 -0.6676 -0.0983 0.5826 6.0700
##
## Random effects:
## Groups Name Variance Std.Dev.
## Year (Intercept) 4.108 2.027
## Residual 33.561 5.793
## Number of obs: 2456, groups: Year, 8
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) -25.52373 1.18717 48.52328 -21.50 <2e-16 ***
## Tail_Length 1.14330 0.01763 2451.96379 64.85 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr)
## Tail_Length -0.789
# Fixed effects:
fixef(mm0)
## (Intercept) Tail_Length
## -25.523733 1.143302
# Random deviations (a.k.a. "random predictions"):
ranef(mm0)
## $Year
## (Intercept)
## 2006 0.05975949
## 2007 -1.62167497
## 2008 -2.07637086
## 2009 0.45260470
## 2010 -1.07825970
## 2011 -1.09178499
## 2012 1.27687634
## 2013 4.07885000
##
## with conditional variances for "Year"
# Fixed effects plus random deviations:
coef(mm0)
## $Year
## (Intercept) Tail_Length
## 2006 -25.46397 1.143302
## 2007 -27.14541 1.143302
## 2008 -27.60010 1.143302
## 2009 -25.07113 1.143302
## 2010 -26.60199 1.143302
## 2011 -26.61552 1.143302
## 2012 -24.24686 1.143302
## 2013 -21.44488 1.143302
##
## attr(,"class")
## [1] "coef.mer"
#Random effect variance components
as.numeric(VarCorr(mm0))
## [1] 4.108495
Visualize the year slope predictions:
predlm <- as.data.frame(coef( mm0 )$Year)
predlm$Year <- rownames(predlm)
mainslope <- as.data.frame(t(fixef(mm0)))
ggplot(roo, aes(x=Tail_Length, y=EscapeDistance)) +
geom_point(aes(color=as.factor(Year)), alpha=0.5) +
geom_smooth(method = "lm")+
geom_abline(inherit.aes = FALSE,
data = predlm,
aes(intercept=`(Intercept)`,
slope=Tail_Length,
color=as.factor(Year))) +
geom_abline(inherit.aes = FALSE,
data = mainslope,
aes(intercept=`(Intercept)`,
slope=Tail_Length), size=2)
## Warning: Ignoring unknown parameters: inherit.aes
## Warning: Ignoring unknown parameters: inherit.aes
## `geom_smooth()` using formula 'y ~ x'
Visualize the distribution of year effects and where to expect new years:
yeardeviations <- as.data.frame(ranef(mm0)$Year)
yeardeviations$Year <- row.names(yeardeviations)
ggplot(yeardeviations, aes(x=`(Intercept)`, color=Year)) +
geom_vline(aes(xintercept=`(Intercept)`, color=Year), size=2)
as.numeric(VarCorr(mm0))
## [1] 4.108495
xgrid <- seq(-8,8, by = 0.01)
randomdens <- dnorm(x = xgrid, mean = 0, sd = sqrt(as.numeric(VarCorr(mm0))))
REdensity <- data.frame(xgrid=xgrid, randomdens=randomdens)
ggplot(REdensity, aes(x=xgrid, y=randomdens)) + geom_line()+
geom_vline(data= yeardeviations,
aes(xintercept=`(Intercept)`, color=Year), size=2, alpha=0.5)
Visualize the projected range of observed and unobserved years around linear regression:
Int_low95 <- fixef(mm0)["(Intercept)"] - 1.96 * sqrt(as.numeric(VarCorr(mm0)))
Int_upp95 <- fixef(mm0)["(Intercept)"] + 1.96 * sqrt(as.numeric(VarCorr(mm0)))
Tail_Length <- seq(20,75, by = 1)
intervalRE <- data.frame(Tail_Length=Tail_Length,
low95 = Int_low95 + fixef(mm0)["Tail_Length"]*Tail_Length,
upp95 = Int_upp95 + fixef(mm0)["Tail_Length"]*Tail_Length)
ggplot(roo, aes(x=Tail_Length, y=EscapeDistance)) +
geom_point(aes(color=as.factor(Year)), alpha=0.5) +
geom_abline(inherit.aes = FALSE,
data = predlm,
aes(intercept=`(Intercept)`,
slope=Tail_Length,
color=as.factor(Year))) +
geom_abline(inherit.aes = FALSE,
data = mainslope,
aes(intercept=`(Intercept)`,
slope=Tail_Length), size=2) +
geom_ribbon(inherit.aes = FALSE,
data=intervalRE,
aes(x=Tail_Length,ymin=low95, ymax=upp95), alpha=0.3)
## Warning: Ignoring unknown parameters: inherit.aes
## Warning: Ignoring unknown parameters: inherit.aes
# Adding residual variation for the year 2006:
Int_low95 <- coef(mm0)$Year["2013", "(Intercept)"] - 1.96 * sigma(mm0)
Int_upp95 <- coef(mm0)$Year["2013", "(Intercept)"] + 1.96 * sigma(mm0)
Tail_Length <- seq(20,75, by = 1)
intervalPrediction2013 <- data.frame(Tail_Length=Tail_Length,
low95 = Int_low95 + fixef(mm0)["Tail_Length"]*Tail_Length,
upp95 = Int_upp95 + fixef(mm0)["Tail_Length"]*Tail_Length)
ggplot(roo, aes(x=Tail_Length, y=EscapeDistance)) +
geom_point(aes(color=as.factor(Year)), alpha=0.5) +
geom_abline(inherit.aes = FALSE,
data = predlm,
aes(intercept=`(Intercept)`,
slope=Tail_Length,
color=as.factor(Year))) +
geom_abline(inherit.aes = FALSE,
data = mainslope,
aes(intercept=`(Intercept)`,
slope=Tail_Length), size=2) +
geom_ribbon(inherit.aes = FALSE,
data=intervalRE,
aes(x=Tail_Length,ymin=low95, ymax=upp95), alpha=0.3)+
geom_ribbon(inherit.aes = FALSE,
data=intervalPrediction2013,
aes(x=Tail_Length,ymin=low95, ymax=upp95, fill="2013"), alpha=0.3)
## Warning: Ignoring unknown parameters: inherit.aes
## Warning: Ignoring unknown parameters: inherit.aes
Repeat the process for id, a random effect that captures far more variation:
mmid <- lmer(EscapeDistance ~ Tail_Length + (1|id), data = roo)
summary(mmid)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: EscapeDistance ~ Tail_Length + (1 | id)
## Data: roo
##
## REML criterion at convergence: 15261.6
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.6347 -0.5656 -0.0712 0.5400 4.5110
##
## Random effects:
## Groups Name Variance Std.Dev.
## id (Intercept) 18.08 4.252
## Residual 20.19 4.493
## Number of obs: 2456, groups: id, 818
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) -21.37495 0.96619 2358.89369 -22.12 <2e-16 ***
## Tail_Length 1.04703 0.01853 2353.53387 56.51 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr)
## Tail_Length -0.981
Int_low95 <- fixef(mmid)["(Intercept)"] - 1.96 * sqrt(as.numeric(VarCorr(mmid)))
Int_upp95 <- fixef(mmid)["(Intercept)"] + 1.96 * sqrt(as.numeric(VarCorr(mmid)))
Tail_Length <- seq(20,75, by = 1)
intervalRE <- data.frame(Tail_Length=Tail_Length,
low95 = Int_low95 + fixef(mmid)["Tail_Length"]*Tail_Length,
upp95 = Int_upp95 + fixef(mmid)["Tail_Length"]*Tail_Length)
mainslope <- as.data.frame(t(fixef(mmid)))
ggplot(roo, aes(x=Tail_Length, y=EscapeDistance)) +
geom_point(aes(color=as.factor(Year)), alpha=0.5) +
geom_abline(inherit.aes = FALSE,
data = mainslope,
aes(intercept=`(Intercept)`,
slope=Tail_Length), size=2) +
geom_ribbon(inherit.aes = FALSE,
data=intervalRE,
aes(x=Tail_Length,ymin=low95, ymax=upp95), alpha=0.3)
## Warning: Ignoring unknown parameters: inherit.aes
## [1] 217
##
## 1991 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007
## 1 1 4 3 3 5 5 5 7 7 8 6 8 7 10 9
## 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020
## 10 10 10 9 10 9 10 10 10 10 10 10 10
Year as a fixed effect models a linear change in the response with years. This does not correct for the non-independence between data collected on a given year.
summary(lm(babies ~ 1 + year, data = data))
##
## Call:
## lm(formula = babies ~ 1 + year, data = data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -4.6103 -1.4083 -0.1328 1.1969 5.3491
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 986.31562 34.83814 28.311 < 2e-16 ***
## year 0.10678 0.01734 6.158 3.58e-09 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.86 on 215 degrees of freedom
## Multiple R-squared: 0.1499, Adjusted R-squared: 0.146
## F-statistic: 37.92 on 1 and 215 DF, p-value: 3.579e-09
summary(lm(babies ~ 1 + storks, data = data))
##
## Call:
## lm(formula = babies ~ 1 + storks, data = data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -5.0552 -1.3745 -0.0409 1.1854 4.7969
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.200e+03 1.612e-01 7445.831 < 2e-16 ***
## storks 4.066e-03 1.007e-03 4.037 7.53e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.945 on 215 degrees of freedom
## Multiple R-squared: 0.07046, Adjusted R-squared: 0.06614
## F-statistic: 16.3 on 1 and 215 DF, p-value: 7.526e-05
summary(lm(babies ~ 1 + storks + year, data = data))
##
## Call:
## lm(formula = babies ~ 1 + storks + year, data = data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -4.6607 -1.4066 -0.1324 1.1480 5.4436
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 9.618e+02 5.282e+01 18.208 < 2e-16 ***
## storks -9.056e-04 1.463e-03 -0.619 0.537
## year 1.190e-01 2.634e-02 4.519 1.03e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.863 on 214 degrees of freedom
## Multiple R-squared: 0.1514, Adjusted R-squared: 0.1435
## F-statistic: 19.1 on 2 and 214 DF, p-value: 2.341e-08
Year as a random effect models differences among years, without considering that years come in a specific order. The random effect correct the estimation of the fixed effect for non-independence between data collected on a give year. However, the random effect does not correct for linear changes in other fixed effects with time.
summary(lmer(babies ~ 1 + (1|year), data = data))
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: babies ~ 1 + (1 | year)
## Data: data
##
## REML criterion at convergence: 850.9
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.40554 -0.61816 -0.05888 0.64060 3.08569
##
## Random effects:
## Groups Name Variance Std.Dev.
## year (Intercept) 1.942 1.394
## Residual 2.293 1.514
## Number of obs: 217, groups: year, 29
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 1200.631 0.284 26.870 4228 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(lmer(babies ~ 1 + storks + (1|year), data = data))
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: babies ~ 1 + storks + (1 | year)
## Data: data
##
## REML criterion at convergence: 857.2
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.4089 -0.6134 -0.1086 0.6304 2.8922
##
## Random effects:
## Groups Name Variance Std.Dev.
## year (Intercept) 1.672 1.293
## Residual 2.296 1.515
## Number of obs: 217, groups: year, 29
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 1.200e+03 3.127e-01 2.748e+01 3838.79 <2e-16 ***
## storks 4.473e-03 2.140e-03 2.570e+01 2.09 0.0467 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr)
## storks -0.522
Sometimes you need both. Year can be both a fixed and a random effect, meaning we want to model a linear change, and the dependence between data collected on the same years.
summary(lmer(babies ~ 1 + storks + year + (1|year) , data = data))
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: babies ~ 1 + storks + year + (1 | year)
## Data: data
##
## REML criterion at convergence: 856
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.4804 -0.6266 -0.0410 0.6304 3.0011
##
## Random effects:
## Groups Name Variance Std.Dev.
## year (Intercept) 1.351 1.162
## Residual 2.296 1.515
## Number of obs: 217, groups: year, 29
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 9.827e+02 8.868e+01 3.226e+01 11.081 1.56e-12 ***
## storks -5.718e-04 2.843e-03 2.685e+01 -0.201 0.8421
## year 1.086e-01 4.426e-02 3.222e+01 2.454 0.0197 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) storks
## storks 0.723
## year -1.000 -0.724
Try with the roo dataset:
summary(lm(EscapeDistance ~ Tail_Length + Year , data = roo))
##
## Call:
## lm(formula = EscapeDistance ~ Tail_Length + Year, data = roo)
##
## Residuals:
## Min 1Q Median 3Q Max
## -22.101 -4.024 -0.593 3.493 36.124
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.389e+03 1.100e+02 -12.63 <2e-16 ***
## Tail_Length 1.086e+00 1.695e-02 64.09 <2e-16 ***
## Year 6.802e-01 5.467e-02 12.44 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 5.917 on 2453 degrees of freedom
## Multiple R-squared: 0.6269, Adjusted R-squared: 0.6266
## F-statistic: 2061 on 2 and 2453 DF, p-value: < 2.2e-16
summary(lmer(EscapeDistance ~Tail_Length + (1|Year), data = roo))
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: EscapeDistance ~ Tail_Length + (1 | Year)
## Data: roo
##
## REML criterion at convergence: 15630
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.6300 -0.6676 -0.0983 0.5826 6.0700
##
## Random effects:
## Groups Name Variance Std.Dev.
## Year (Intercept) 4.108 2.027
## Residual 33.561 5.793
## Number of obs: 2456, groups: Year, 8
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) -25.52373 1.18717 48.52328 -21.50 <2e-16 ***
## Tail_Length 1.14330 0.01763 2451.96379 64.85 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr)
## Tail_Length -0.789
summary(lmer(EscapeDistance ~Tail_Length + Year + (1|Year), data = roo))
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: EscapeDistance ~ Tail_Length + Year + (1 | Year)
## Data: roo
##
## REML criterion at convergence: 15627.1
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.6240 -0.6688 -0.0988 0.5875 6.0857
##
## Random effects:
## Groups Name Variance Std.Dev.
## Year (Intercept) 2.68 1.637
## Residual 33.56 5.793
## Number of obs: 2456, groups: Year, 8
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) -1.122e+03 5.206e+02 5.822e+00 -2.155 0.0760 .
## Tail_Length 1.142e+00 1.761e-02 2.436e+03 64.867 <2e-16 ***
## Year 5.456e-01 2.591e-01 5.822e+00 2.106 0.0812 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) Tl_Lng
## Tail_Length -0.011
## Year -1.000 0.009
Lme4 summary does not show any measure of uncertainty in random effect variances. How do we know whether a random effect is statistically significant, or what range of values are likely?
You can test a random effect using a Likelihood Ratio Test that compares a model with and without the random effect. The models need to be nested, which means one model is a subset of the other one.
mm0 <- lmer(EscapeDistance ~ Age*Sex + (1|Year), data = roo)
m0 <- lm(EscapeDistance ~ Age*Sex, data = roo)
anova(mm0, m0)
## refitting model(s) with ML (instead of REML)
## Data: roo
## Models:
## m0: EscapeDistance ~ Age * Sex
## mm0: EscapeDistance ~ Age * Sex + (1 | Year)
## Df AIC BIC logLik deviance Chisq Chi Df Pr(>Chisq)
## m0 5 15052 15082 -7521.2 15042
## mm0 6 15005 15040 -7496.3 14993 49.802 1 1.701e-12 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
mm1 <- lmer(EscapeDistance ~ Age*Sex + (1|Year) + (1|id), data = roo)
mm2 <- lmer(EscapeDistance ~ Age*Sex + (1|Year), data = roo)
mm3 <- lmer(EscapeDistance ~ Age*Sex + (1|id), data = roo)
anova(mm1, mm2)
## refitting model(s) with ML (instead of REML)
## Data: roo
## Models:
## mm2: EscapeDistance ~ Age * Sex + (1 | Year)
## mm1: EscapeDistance ~ Age * Sex + (1 | Year) + (1 | id)
## Df AIC BIC logLik deviance Chisq Chi Df Pr(>Chisq)
## mm2 6 15005 15040 -7496.3 14993
## mm1 7 14584 14625 -7285.2 14570 422.24 1 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova(mm1,mm3)
## refitting model(s) with ML (instead of REML)
## Data: roo
## Models:
## mm3: EscapeDistance ~ Age * Sex + (1 | id)
## mm1: EscapeDistance ~ Age * Sex + (1 | Year) + (1 | id)
## Df AIC BIC logLik deviance Chisq Chi Df Pr(>Chisq)
## mm3 6 14629 14664 -7308.6 14617
## mm1 7 14584 14625 -7285.2 14570 46.837 1 7.713e-12 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Estimate confidence intervals of standard deviation parameters:
confint(mm1)
## Computing profile confidence intervals ...
## 2.5 % 97.5 %
## .sig01 2.9833569 3.583530
## .sig02 0.5156091 1.666197
## .sigma 3.8752567 4.151607
## (Intercept) 38.9228850 40.642918
## AgeYoung -15.4480436 -14.229890
## SexMale 2.9932686 4.572653
## AgeYoung:SexMale -3.8330801 -2.037551
.sig01 is the standard deviation for the first random effect, .sig02 for the second random effect, .sigma for the residual standard deviation.
What about uncertainty in the random level predictions produced by ranef() ? You can’t with lme4, and it is very difficult in general with maximum likelihood methods. You need to go Bayesian (for instance with MCMCglmm), but we won’t cover that here.