Load required packages.
library(ggplot2)
library(lme4)
## Loading required package: Matrix
## Loading required package: Rcpp
## Warning: package 'Rcpp' was built under R version 3.1.3
library(dplyr)
##
## Attaching package: 'dplyr'
##
## The following object is masked from 'package:stats':
##
## filter
##
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
Load our eyetracking data.
data <- read.csv('data-eyetracking.csv')
Note: In this tutorial, we will do by-subjects time analyses only (aggregating across trials within-subjects) for simplicity but you can do by-items/trials analyses in the same way if you aggregate within trials instead of subjects.
Let’s prep these data for analysis much like we prepped for the empirical logit analysis.
# rescale, bin, aggregate, and transform our DV's all in one step
binned <- data %>%
mutate(TimeS = TimeFromSubphaseOnset / 1000,
Bin = TimeFromSubphaseOnset %/% 50) %>% # re-scale, bin
group_by(ParticipantName,Target,Bin) %>% # aggregate within bins
summarise(PropAnimal = mean(Animate), y = sum(Animate), N = length(Animate), TimeS = min(TimeS)) %>%
mutate(elog = log( (y + .5) / (N - y + .5) ), # empirical logit
wts = 1/(y + .5) + 1/(N - y + .5), # optional weights
Arcsin = asin(sqrt(PropAnimal))) %>% # arcsin-sqrt
ungroup()
Here’s a model similar to the linear model we had before :
binned$TargetC <- ifelse(binned$Target == 'Animal', .5, -.5)
binned$TargetC <- scale(binned$TargetC, center=T, scale=F)
model <- lmer(elog ~ TargetC*TimeS + (1 + TargetC + TimeS | ParticipantName), data = binned)
summary(model)
## Linear mixed model fit by REML ['lmerMod']
## Formula: elog ~ TargetC * TimeS + (1 + TargetC + TimeS | ParticipantName)
## Data: binned
##
## REML criterion at convergence: 23919.8
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.92264 -0.79136 0.08027 0.73641 2.52996
##
## Random effects:
## Groups Name Variance Std.Dev. Corr
## ParticipantName (Intercept) 0.54268 0.7367
## TargetC 1.16610 1.0799 -0.08
## TimeS 0.06879 0.2623 -0.76 0.00
## Residual 2.18646 1.4787
## Number of obs: 6526, groups: ParticipantName, 30
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 1.22080 0.13930 8.764
## TargetC 2.33756 0.21006 11.128
## TimeS -0.24036 0.04926 -4.880
## TargetC:TimeS -0.15481 0.02302 -6.725
##
## Correlation of Fixed Effects:
## (Intr) TargtC TimeS
## TargetC -0.075
## TimeS -0.762 -0.002
## TargetC:TmS 0.000 -0.297 0.001
Visualize the data and model fit:
ggplot(binned, aes(x=TimeS, y=elog, color=Target)) +
stat_summary(fun.y=mean, geom="point") +
stat_summary(aes(y=predict(model,binned,re.form=NA)), fun.y=mean, geom="line")
We already know that this model isn’t a great fit to our data. By assuming linear growth, it gives us weird estimates, one major one being that it thinks our two groups differ at timepoint 0 (which we know not to be true – the differences emerge over time).
Let’s do our first stab at examining non-linear growth by creating and entering natural polynomials into the model.
binned <- binned %>%
mutate(TimeS_2 = TimeS^2,
TimeS_3 = TimeS^3,
TimeS_4 = TimeS^4)
head(binned)
## Source: local data frame [6 x 14]
##
## ParticipantName Target Bin PropAnimal y N TimeS elog wts
## 1 ANCAT139 Animal 1 1 3 3 0.05 1.94591 2.285714
## 2 ANCAT139 Animal 2 1 3 3 0.10 1.94591 2.285714
## 3 ANCAT139 Animal 3 1 3 3 0.15 1.94591 2.285714
## 4 ANCAT139 Animal 4 1 3 3 0.20 1.94591 2.285714
## 5 ANCAT139 Animal 5 1 3 3 0.25 1.94591 2.285714
## 6 ANCAT139 Animal 6 1 3 3 0.30 1.94591 2.285714
## Variables not shown: Arcsin (dbl), TargetC (dbl), TimeS_2 (dbl), TimeS_3
## (dbl), TimeS_4 (dbl)
ggplot(binned, aes(x=TimeS, y=TimeS)) +
geom_point() +
geom_point(aes(y=TimeS_2), color='red') +
geom_point(aes(y=TimeS_3), color='blue') +
geom_point(aes(y=TimeS_4), color='green')
Each natural polynomial accelerates at a different rate. These have an interesting property in that, when combined, they capture the correlates/estimated slopes of a variable at successive “bends” in the data. i.e., The 2nd (quadratic) polynomial will capture the first bend, the 3rd (cubic) polynomial will capture the second bend, and so on.
In this way, models that include these natural polynomials can model non-linear growth.
model <- lmer(elog ~ Target*(TimeS + TimeS_2 + TimeS_3 + TimeS_4) + (1 + Target + TimeS + TimeS_2 + TimeS_3 + TimeS_4 | ParticipantName), data = binned)
## Warning in optwrap(optimizer, devfun, getStart(start, rho$lower, rho$pp),
## : convergence code 1 from bobyqa: bobyqa -- maximum number of function
## evaluations exceeded
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl =
## control$checkConv, : Model failed to converge with max|grad| = 1.29175
## (tol = 0.002, component 16)
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl =
## control$checkConv, : Model failed to converge: degenerate Hessian with 1
## negative eigenvalues
summary(model)
## Linear mixed model fit by REML ['lmerMod']
## Formula: elog ~ Target * (TimeS + TimeS_2 + TimeS_3 + TimeS_4) + (1 +
## Target + TimeS + TimeS_2 + TimeS_3 + TimeS_4 | ParticipantName)
## Data: binned
##
## REML criterion at convergence: 22154
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.02884 -0.74205 0.01878 0.67663 3.03478
##
## Random effects:
## Groups Name Variance Std.Dev. Corr
## ParticipantName (Intercept) 1.430660 1.19610
## TargetArtefact 1.096841 1.04730 -0.24
## TimeS 5.137607 2.26663 -0.30 0.08
## TimeS_2 3.174713 1.78177 0.03 0.02 -0.58
## TimeS_3 0.337531 0.58097 0.09 -0.02 0.41 -0.93
## TimeS_4 0.003629 0.06024 -0.14 0.01 -0.36 0.78
## Residual 1.564735 1.25089
##
##
##
##
##
##
## -0.95
##
## Number of obs: 6526, groups: ParticipantName, 30
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 1.642371 0.241679 6.796
## TargetArtefact 0.750439 0.240789 3.117
## TimeS 2.123542 0.490856 4.326
## TimeS_2 -1.753652 0.380356 -4.611
## TimeS_3 0.439360 0.119095 3.689
## TimeS_4 -0.035731 0.012042 -2.967
## TargetArtefact:TimeS -7.295167 0.372474 -19.586
## TargetArtefact:TimeS_2 4.368586 0.277716 15.730
## TargetArtefact:TimeS_3 -0.935575 0.076203 -12.277
## TargetArtefact:TimeS_4 0.066896 0.006888 9.711
##
## Correlation of Fixed Effects:
## (Intr) TrgtAr TimeS TimS_2 TimS_3 TimS_4 TrA:TS TA:TS_2
## TargtArtfct -0.353
## TimeS -0.428 0.248
## TimeS_2 0.186 -0.150 -0.685
## TimeS_3 -0.048 0.108 0.529 -0.938
## TimeS_4 -0.018 -0.091 -0.469 0.811 -0.957
## TrgtArtf:TS 0.259 -0.519 -0.381 0.355 -0.294 0.248
## TrgtAr:TS_2 -0.221 0.441 0.369 -0.367 0.318 -0.276 -0.967
## TrgtAr:TS_3 0.194 -0.389 -0.349 0.363 -0.323 0.286 0.913 -0.985
## TrgtAr:TS_4 -0.176 0.351 0.329 -0.352 0.321 -0.289 -0.861 0.957
## TA:TS_3
## TargtArtfct
## TimeS
## TimeS_2
## TimeS_3
## TimeS_4
## TrgtArtf:TS
## TrgtAr:TS_2
## TrgtAr:TS_3
## TrgtAr:TS_4 -0.992
We have some convergence issues caused by overparameterization, so let’s scale back on these polynomials to something that seems more reasonable. A good rule of thumb is to count the number of “bends” in the data before breaking it down by condition.
How many bends do we see?
ggplot(binned, aes(x=TimeS, y=elog)) +
stat_smooth(method="loess")
There looks to be two bends in the data which, because of the N-1 relationship between polynomials and bends, means we should include 3 polynomial terms.
model <- lmer(elog ~ Target*(TimeS + TimeS_2 + TimeS_3) + (1 + Target + TimeS + TimeS_2 + TimeS_3 | ParticipantName), data = binned)
## Warning in optwrap(optimizer, devfun, getStart(start, rho$lower, rho$pp),
## : convergence code 1 from bobyqa: bobyqa -- maximum number of function
## evaluations exceeded
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl =
## control$checkConv, : Model failed to converge with max|grad| = 0.341975
## (tol = 0.002, component 3)
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, : Model is nearly unidentifiable: very large eigenvalue
## - Rescale variables?
summary(model)
## Linear mixed model fit by REML ['lmerMod']
## Formula:
## elog ~ Target * (TimeS + TimeS_2 + TimeS_3) + (1 + Target + TimeS +
## TimeS_2 + TimeS_3 | ParticipantName)
## Data: binned
##
## REML criterion at convergence: 22477.3
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.8434 -0.7320 0.0330 0.6972 3.0251
##
## Random effects:
## Groups Name Variance Std.Dev. Corr
## ParticipantName (Intercept) 1.47588 1.2149
## TargetArtefact 1.17655 1.0847 -0.38
## TimeS 4.01804 2.0045 -0.55 0.14
## TimeS_2 0.96055 0.9801 0.32 -0.20 -0.94
## TimeS_3 0.01518 0.1232 -0.20 0.23 0.87 -0.98
## Residual 1.69721 1.3028
## Number of obs: 6526, groups: ParticipantName, 30
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 2.08370 0.23867 8.731
## TargetArtefact -0.07437 0.23390 -0.318
## TimeS 0.44895 0.39189 1.146
## TimeS_2 -0.36833 0.18866 -1.952
## TimeS_3 0.04626 0.02362 1.959
## TargetArtefact:TimeS -4.16823 0.19737 -21.119
## TargetArtefact:TimeS_2 1.78000 0.08393 21.209
## TargetArtefact:TimeS_3 -0.20031 0.01006 -19.909
##
## Correlation of Fixed Effects:
## (Intr) TrgtAr TimeS TimS_2 TimS_3 TrA:TS TA:TS_2
## TargtArtfct -0.435
## TimeS -0.595 0.226
## TimeS_2 0.369 -0.248 -0.944
## TimeS_3 -0.251 0.260 0.876 -0.985
## TrgtArtf:TS 0.225 -0.457 -0.254 0.217 -0.197
## TrgtAr:TS_2 -0.192 0.390 0.246 -0.225 0.213 -0.967
## TrgtAr:TS_3 0.170 -0.345 -0.233 0.222 -0.217 0.914 -0.986
There is still a convergence error, likely because the scales of our variables are too different, like before. We will ignore this for now, because our final GCA approach will fix this.
In the mean time, let’s see what this model did for us. Looking at the estimates above, it seems to have gotten rid of our timepoint 0 main effect of Target (yes!), and instead shows some strong interactions over time. This seems promising… but let’s visualize.
ggplot(binned, aes(x=TimeS, y=elog, color=Target)) +
stat_summary(fun.y=mean, geom="point") +
stat_summary(aes(y=predict(model,binned,re.form=NA)), fun.y=mean, geom="line")
Awesome! Much better.
Let’s store this model because are going to use it for comparison later.
natural_model <- model
We just did our first non-linear growth curve analysis, but it was sub-optimal for two reasons:
Thankfully, we have something that will help: orthogonal polynomials.
Let’s first consider problem (1): our natural polynomial are highly correlated with each other. Here you can see this in a standard correlation matrix.
cor(binned[, c('TimeS','TimeS_2','TimeS_3','TimeS_4')])
## TimeS TimeS_2 TimeS_3 TimeS_4
## TimeS 1.0000000 0.9673950 0.9148202 0.8637499
## TimeS_2 0.9673950 1.0000000 0.9858194 0.9578085
## TimeS_3 0.9148202 0.9858194 1.0000000 0.9920735
## TimeS_4 0.8637499 0.9578085 0.9920735 1.0000000
This is not a good thing when we are trying to attribute variance to each factor independently. This isn’t unique to time-based models – any linear model suffers from multicollinearity.
So, what we can do is actually create replacement timecodes for linear, quadratic, cubic, etc. change over time that we know by design will be uncorrelated.
poly()
will generate higher-order polynomials for us, with a vector length equivalent to the length of our original time vector.
We’ll go up to 6th-order polynomials, but we’ll stick to the first 3 for most of our models.
orthogonal_polynomials <- poly(sort(as.vector(unique(binned$TimeS))), 6)
head(orthogonal_polynomials)
## 1 2 3 4 5 6
## [1,] -0.1522008 0.1965879 -0.2280843 0.25890514 -0.28163291 0.29210413
## [2,] -0.1495809 0.1862187 -0.2038997 0.21335606 -0.20920324 0.18790281
## [3,] -0.1469609 0.1760350 -0.1808064 0.17149897 -0.14581956 0.10238653
## [4,] -0.1443409 0.1660370 -0.1587845 0.13317643 -0.09082299 0.03350625
## [5,] -0.1417209 0.1562246 -0.1378139 0.09823392 -0.04358204 -0.02064481
## [6,] -0.1391009 0.1465978 -0.1178747 0.06651981 -0.00349211 -0.06183707
Column 1 grows linearly. Column 2 grows quadratically. Column 3 grows cubicly… etc.
Visualize this, and verify that they are indeed uncorrelated.
ggplot(data.frame(orthogonal_polynomials), aes(x=X1, y=X1)) +
geom_point() +
geom_point(aes(y=X2), color='red') +
geom_point(aes(y=X3), color='blue') +
geom_point(aes(y=X4), color='green') +
geom_point(aes(y=X5), color='purple') +
geom_point(aes(y=X6), color='yellow')
cor(orthogonal_polynomials[, c(1:6)])
## 1 2 3 4 5
## 1 1.000000e+00 -3.746257e-17 -6.884345e-17 6.189609e-17 5.817083e-17
## 2 -3.746257e-17 1.000000e+00 -5.529431e-18 -1.469433e-17 -1.280307e-16
## 3 -6.884345e-17 -5.529431e-18 1.000000e+00 2.886349e-16 3.596163e-17
## 4 6.189609e-17 -1.469433e-17 2.886349e-16 1.000000e+00 -1.673568e-16
## 5 5.817083e-17 -1.280307e-16 3.596163e-17 -1.673568e-16 1.000000e+00
## 6 2.201879e-16 -1.657135e-17 -2.105385e-17 -8.550967e-17 -7.173353e-17
## 6
## 1 2.201879e-16
## 2 -1.657135e-17
## 3 -2.105385e-17
## 4 -8.550967e-17
## 5 -7.173353e-17
## 6 1.000000e+00
round(cor(orthogonal_polynomials[, c(1:6)]),5)
## 1 2 3 4 5 6
## 1 1 0 0 0 0 0
## 2 0 1 0 0 0 0
## 3 0 0 1 0 0 0
## 4 0 0 0 1 0 0
## 5 0 0 0 0 1 0
## 6 0 0 0 0 0 1
Perfect!
I like to merge them into the original dataframe using this technique, which allows for missing data from any given participant.
time_codes <- data.frame(
sort(as.vector(unique(binned$TimeS))),
orthogonal_polynomials[, c(1:6)]
)
colnames(time_codes) <- c('TimeS','ot1','ot2','ot3','ot4','ot5','ot6')
binned <- merge(binned, time_codes, by='TimeS')
Now let’s model our data exactly like we did before but using these orthogonal polynomials:
model <- lmer(elog ~ TargetC*(ot1 + ot2 + ot3) + (1 + TargetC + ot1 + ot2 + ot3 | ParticipantName), data = binned)
summary(model)
## Linear mixed model fit by REML ['lmerMod']
## Formula: elog ~ TargetC * (ot1 + ot2 + ot3) + (1 + TargetC + ot1 + ot2 +
## ot3 | ParticipantName)
## Data: binned
##
## REML criterion at convergence: 22435.2
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.83455 -0.73074 0.03328 0.69541 3.02566
##
## Random effects:
## Groups Name Variance Std.Dev. Corr
## ParticipantName (Intercept) 0.2764 0.5257
## TargetC 1.1894 1.0906 -0.12
## ot1 25.4028 5.0401 0.40 -0.01
## ot2 21.8758 4.6772 0.09 -0.19 -0.07
## ot3 20.1808 4.4923 0.02 -0.17 -0.59 0.12
## Residual 1.6970 1.3027
## Number of obs: 6526, groups: ParticipantName, 30
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 0.53405 0.09738 5.484
## TargetC 1.89048 0.20180 9.368
## ot1 -4.36032 0.94073 -4.635
## ot2 1.94414 0.87515 2.221
## ot3 -2.04657 0.84179 -2.431
## TargetC:ot1 -3.10083 0.39068 -7.937
## TargetC:ot2 -2.90937 0.38230 -7.610
## TargetC:ot3 7.52262 0.37782 19.910
##
## Correlation of Fixed Effects:
## (Intr) TargtC ot1 ot2 ot3 TrgC:1 TrgC:2
## TargetC -0.114
## ot1 0.392 -0.007
## ot2 0.090 -0.183 -0.057
## ot3 0.022 -0.168 -0.557 0.117
## TargetC:ot1 0.002 0.021 0.001 0.001 0.000
## TargetC:ot2 0.000 0.009 0.001 0.002 0.003 0.114
## TargetC:ot3 -0.001 0.002 0.000 0.003 0.002 0.026 0.088
Great fit! No errors.
Interestingly, we are back to seeing a main effect of TargetArtefact
, though… Why? The reason is simple: Our natural polynomials all started at Timepoint 0, meaning that main effects represented differences at the start of the time window. In contrast, orthogonal polynomials are, by default, centered at 0, meaning that main effects represent average differences (across time) between levels of a factor.
Let’s visualize our data and model fit:
ggplot(binned, aes(x=TimeS, y=elog, color=Target)) +
stat_summary(fun.y=mean, geom="point") +
stat_summary(aes(y=predict(model,binned,re.form=NA)), fun.y=mean, geom="line")
Compare this model to our natural polynomial model.
summary(natural_model)
## Linear mixed model fit by REML ['lmerMod']
## Formula:
## elog ~ Target * (TimeS + TimeS_2 + TimeS_3) + (1 + Target + TimeS +
## TimeS_2 + TimeS_3 | ParticipantName)
## Data: binned
##
## REML criterion at convergence: 22477.3
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.8434 -0.7320 0.0330 0.6972 3.0251
##
## Random effects:
## Groups Name Variance Std.Dev. Corr
## ParticipantName (Intercept) 1.47588 1.2149
## TargetArtefact 1.17655 1.0847 -0.38
## TimeS 4.01804 2.0045 -0.55 0.14
## TimeS_2 0.96055 0.9801 0.32 -0.20 -0.94
## TimeS_3 0.01518 0.1232 -0.20 0.23 0.87 -0.98
## Residual 1.69721 1.3028
## Number of obs: 6526, groups: ParticipantName, 30
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 2.08370 0.23867 8.731
## TargetArtefact -0.07437 0.23390 -0.318
## TimeS 0.44895 0.39189 1.146
## TimeS_2 -0.36833 0.18866 -1.952
## TimeS_3 0.04626 0.02362 1.959
## TargetArtefact:TimeS -4.16823 0.19737 -21.119
## TargetArtefact:TimeS_2 1.78000 0.08393 21.209
## TargetArtefact:TimeS_3 -0.20031 0.01006 -19.909
##
## Correlation of Fixed Effects:
## (Intr) TrgtAr TimeS TimS_2 TimS_3 TrA:TS TA:TS_2
## TargtArtfct -0.435
## TimeS -0.595 0.226
## TimeS_2 0.369 -0.248 -0.944
## TimeS_3 -0.251 0.260 0.876 -0.985
## TrgtArtf:TS 0.225 -0.457 -0.254 0.217 -0.197
## TrgtAr:TS_2 -0.192 0.390 0.246 -0.225 0.213 -0.967
## TrgtAr:TS_3 0.170 -0.345 -0.233 0.222 -0.217 0.914 -0.986
summary(model)
## Linear mixed model fit by REML ['lmerMod']
## Formula: elog ~ TargetC * (ot1 + ot2 + ot3) + (1 + TargetC + ot1 + ot2 +
## ot3 | ParticipantName)
## Data: binned
##
## REML criterion at convergence: 22435.2
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.83455 -0.73074 0.03328 0.69541 3.02566
##
## Random effects:
## Groups Name Variance Std.Dev. Corr
## ParticipantName (Intercept) 0.2764 0.5257
## TargetC 1.1894 1.0906 -0.12
## ot1 25.4028 5.0401 0.40 -0.01
## ot2 21.8758 4.6772 0.09 -0.19 -0.07
## ot3 20.1808 4.4923 0.02 -0.17 -0.59 0.12
## Residual 1.6970 1.3027
## Number of obs: 6526, groups: ParticipantName, 30
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 0.53405 0.09738 5.484
## TargetC 1.89048 0.20180 9.368
## ot1 -4.36032 0.94073 -4.635
## ot2 1.94414 0.87515 2.221
## ot3 -2.04657 0.84179 -2.431
## TargetC:ot1 -3.10083 0.39068 -7.937
## TargetC:ot2 -2.90937 0.38230 -7.610
## TargetC:ot3 7.52262 0.37782 19.910
##
## Correlation of Fixed Effects:
## (Intr) TargtC ot1 ot2 ot3 TrgC:1 TrgC:2
## TargetC -0.114
## ot1 0.392 -0.007
## ot2 0.090 -0.183 -0.057
## ot3 0.022 -0.168 -0.557 0.117
## TargetC:ot1 0.002 0.021 0.001 0.001 0.000
## TargetC:ot2 0.000 0.009 0.001 0.002 0.003 0.114
## TargetC:ot3 -0.001 0.002 0.000 0.003 0.002 0.026 0.088
We can use the same methods as before to get confidence intervals, test for Type III significance, etc.
# confint(model)
# this takes a long with a model this complex....
drop1(model, ~., test="Chisq")
## Single term deletions
##
## Model:
## elog ~ TargetC * (ot1 + ot2 + ot3) + (1 + TargetC + ot1 + ot2 +
## ot3 | ParticipantName)
## Df AIC LRT Pr(Chi)
## <none> 22483
## TargetC 1 22523 41.80 1.012e-10 ***
## ot1 1 22497 16.61 4.592e-05 ***
## ot2 1 22486 4.72 0.02988 *
## ot3 1 22486 5.57 0.01826 *
## TargetC:ot1 1 22544 62.72 2.378e-15 ***
## TargetC:ot2 1 22538 57.69 3.075e-14 ***
## TargetC:ot3 1 22866 384.71 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
drop1()
suggests that all of our parameters are reliable predictors.
Let’s try adding 4th and 5th orthogonal polynomials manually and seeing their effects on this model.
model_quartic <- lmer(elog ~ TargetC*(ot1 + ot2 + ot3 + ot4) + (1 + TargetC + ot1 + ot2 + ot3 + ot4 | ParticipantName), data = binned)
summary(model_quartic)
## Linear mixed model fit by REML ['lmerMod']
## Formula: elog ~ TargetC * (ot1 + ot2 + ot3 + ot4) + (1 + TargetC + ot1 +
## ot2 + ot3 + ot4 | ParticipantName)
## Data: binned
##
## REML criterion at convergence: 21981
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.07020 -0.73725 0.01891 0.67347 2.98970
##
## Random effects:
## Groups Name Variance Std.Dev. Corr
## ParticipantName (Intercept) 0.2797 0.5289
## TargetC 1.1826 1.0875 -0.12
## ot1 25.8122 5.0806 0.41 0.00
## ot2 21.7316 4.6617 0.09 -0.19 -0.07
## ot3 20.9333 4.5753 -0.01 -0.19 -0.60 0.09
## ot4 14.7505 3.8406 -0.35 -0.07 -0.26 -0.34
## Residual 1.5621 1.2498
##
##
##
##
##
##
## 0.16
##
## Number of obs: 6526, groups: ParticipantName, 30
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 0.53420 0.09784 5.460
## TargetC 1.90190 0.20103 9.461
## ot1 -4.36103 0.94637 -4.608
## ot2 1.95793 0.87075 2.249
## ot3 -2.03754 0.85514 -2.383
## ot4 -0.10087 0.72330 -0.139
## TargetC:ot1 -3.00666 0.37521 -8.013
## TargetC:ot2 -2.88869 0.36704 -7.870
## TargetC:ot3 7.23899 0.36436 19.868
## TargetC:ot4 -3.44293 0.35421 -9.720
##
## Correlation of Fixed Effects:
## (Intr) TargtC ot1 ot2 ot3 ot4 TrgC:1 TrgC:2 TrgC:3
## TargetC -0.114
## ot1 0.397 -0.001
## ot2 0.086 -0.185 -0.062
## ot3 -0.013 -0.181 -0.570 0.092
## ot4 -0.335 -0.070 -0.250 -0.320 0.153
## TargetC:ot1 0.002 0.020 0.002 0.001 0.000 0.001
## TargetC:ot2 0.000 0.009 0.001 0.002 0.003 0.002 0.115
## TargetC:ot3 -0.001 0.002 0.000 0.003 0.003 0.003 0.026 0.090
## TargetC:ot4 0.000 -0.003 0.001 0.001 0.002 0.001 -0.005 0.011 0.093
ggplot(binned, aes(x=TimeS, y=elog, color=Target)) +
stat_summary(fun.y=mean, geom="point") +
stat_summary(aes(y=predict(model,binned,re.form=NA)), fun.y=mean, geom="line", linetype='dashed') + # 3rd-order model
stat_summary(aes(y=predict(model_quartic,binned,re.form=NA)), fun.y=mean, geom="line") # 4th-order model
anova(model, model_quartic)
## refitting model(s) with ML (instead of REML)
## Data: binned
## Models:
## model: elog ~ TargetC * (ot1 + ot2 + ot3) + (1 + TargetC + ot1 + ot2 +
## model: ot3 | ParticipantName)
## model_quartic: elog ~ TargetC * (ot1 + ot2 + ot3 + ot4) + (1 + TargetC + ot1 +
## model_quartic: ot2 + ot3 + ot4 | ParticipantName)
## Df AIC BIC logLik deviance Chisq Chi Df Pr(>Chisq)
## model 24 22483 22646 -11217 22435
## model_quartic 32 22045 22262 -10990 21981 453.79 8 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Despite the very underwhelming difference in model fits, model comparison says that ot4
is a significant predictor.
One possibility is that this significant difference is because we not only added ot4
as a fixed effect, we also added it to the random structure. When examining the influence of a fixed effect, it’s best to keep your random effect structure constant. The fact that we changed our random structure is why the anova()
says that our models differ by 8 degrees of freedom when we only wanted to see the influence of the main effect of ot4
and its interaction with TargetC
(i.e., two parameters or 2 df difference).
model_cubic <- lmer(elog ~ TargetC*(ot1 + ot2 + ot3) + (1 + TargetC + ot1 + ot2 + ot3 + ot4 | ParticipantName), data = binned, REML=F)
model_quartic <- lmer(elog ~ TargetC*(ot1 + ot2 + ot3 + ot4) + (1 + TargetC + ot1 + ot2 + ot3 + ot4 | ParticipantName), data = binned, REML=F)
anova(model_cubic, model_quartic)
## Data: binned
## Models:
## model_cubic: elog ~ TargetC * (ot1 + ot2 + ot3) + (1 + TargetC + ot1 + ot2 +
## model_cubic: ot3 + ot4 | ParticipantName)
## model_quartic: elog ~ TargetC * (ot1 + ot2 + ot3 + ot4) + (1 + TargetC + ot1 +
## model_quartic: ot2 + ot3 + ot4 | ParticipantName)
## Df AIC BIC logLik deviance Chisq Chi Df Pr(>Chisq)
## model_cubic 30 22135 22338 -11037 22075
## model_quartic 32 22045 22262 -10990 21981 93.869 2 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Despite the underwhelming difference in visualized model fits, it still says it’s a significantly better fit.
What about a 5th polynomial?
model_quartic <- lmer(elog ~ TargetC*(ot1 + ot2 + ot3 + ot4) + (1 + TargetC + ot1 + ot2 + ot3 + ot4 + ot5 | ParticipantName), data = binned, REML=F)
model_quintic <- lmer(elog ~ TargetC*(ot1 + ot2 + ot3 + ot4 + ot5) + (1 + TargetC + ot1 + ot2 + ot3 + ot4 + ot5 | ParticipantName), data = binned, REML=F)
ggplot(binned, aes(x=TimeS, y=elog, color=Target)) +
stat_summary(fun.y=mean, geom="point") +
stat_summary(aes(y=predict(model_quartic,binned,re.form=NA)), fun.y=mean, geom="line", linetype='dashed') + # 4th-order model
stat_summary(aes(y=predict(model_quintic,binned,re.form=NA)), fun.y=mean, geom="line") # 5th-order model
anova(model_quartic, model_quintic)
## Data: binned
## Models:
## model_quartic: elog ~ TargetC * (ot1 + ot2 + ot3 + ot4) + (1 + TargetC + ot1 +
## model_quartic: ot2 + ot3 + ot4 + ot5 | ParticipantName)
## model_quintic: elog ~ TargetC * (ot1 + ot2 + ot3 + ot4 + ot5) + (1 + TargetC +
## model_quintic: ot1 + ot2 + ot3 + ot4 + ot5 | ParticipantName)
## Df AIC BIC logLik deviance Chisq Chi Df Pr(>Chisq)
## model_quartic 39 21847 22111 -10884 21769
## model_quintic 41 21849 22127 -10883 21767 1.7237 2 0.4224
Adding a 5th polynomial did not improve our fit.
Here’s a final reminder of how bad our linear model was:
model_linear <- lmer(elog ~ TargetC*(ot1) + (1 + TargetC + ot1 | ParticipantName), data = binned)
summary(model_linear)
## Linear mixed model fit by REML ['lmerMod']
## Formula: elog ~ TargetC * (ot1) + (1 + TargetC + ot1 | ParticipantName)
## Data: binned
##
## REML criterion at convergence: 23908
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.92264 -0.79136 0.08027 0.73641 2.52996
##
## Random effects:
## Groups Name Variance Std.Dev. Corr
## ParticipantName (Intercept) 0.2753 0.5247
## TargetC 1.1661 1.0799 -0.12
## ot1 25.0545 5.0054 0.39 0.00
## Residual 2.1865 1.4787
## Number of obs: 6526, groups: ParticipantName, 30
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 0.52266 0.09757 5.357
## TargetC 1.88789 0.20061 9.411
## ot1 -4.58695 0.93999 -4.880
## TargetC:ot1 -2.95447 0.43935 -6.725
##
## Correlation of Fixed Effects:
## (Intr) TargtC ot1
## TargetC -0.114
## ot1 0.379 -0.002
## TargetC:ot1 0.002 0.022 0.001
ggplot(binned, aes(x=TimeS, y=elog, color=Target)) +
stat_summary(fun.y=mean, geom="point") +
stat_summary(aes(y=predict(model_quartic,binned,re.form=NA)), fun.y=mean, geom="line", linetype='dashed') + # 3rd-order model
stat_summary(aes(y=predict(model_linear,binned,re.form=NA)), fun.y=mean, geom="line") # 2nd-order model
I like to design experiments with only 2 levels per factor for simplicity but sometimes we have 3 levels in a factor and, now, main effects do not equal simple effects.
To demonstrate, let’s add a third “Neutral” Target level that will follow a similar trajectory to the “Animal” level but shifted down towards 50% chance.
new_condition <- binned[which(binned$Target == 'Animal'), ]
new_condition$Target <- 'Neutral'
#new_condition$y <- new_condition$y - round(new_condition$N / 3)
new_condition$y <- new_condition$y + round(rnorm(length(new_condition$y),-.5,2))
new_condition$y <- ifelse(new_condition$y > new_condition$N,new_condition$N,new_condition$y)
new_condition[which(new_condition$y < 1), 'y'] <- 1
new_condition$PropAnimal <- new_condition$y / new_condition$N
new_condition$elog <- log( (new_condition$y) / (new_condition$N - new_condition$y + .5) )
new_condition$wts <- 1/(new_condition$y + .5) + 1/(new_condition$N - new_condition$y + .5)
new_condition$Arcsin <- asin(sqrt(new_condition$PropAnimal))
binned_3levels <- rbind(binned,new_condition)
binned_3levels$Target <- factor(binned_3levels$Target)
ggplot(binned_3levels, aes(x=TimeS, y=elog, color=Target)) +
stat_summary(fun.y=mean, geom="point")
Fit a model with Target treatment-coded.
model <- lmer(elog ~ Target*(ot1 + ot2 + ot3) + (1 + Target + ot1 + ot2 + ot3 | ParticipantName), data = binned_3levels)
## Warning in optwrap(optimizer, devfun, getStart(start, rho$lower, rho$pp),
## : convergence code 1 from bobyqa: bobyqa -- maximum number of function
## evaluations exceeded
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl =
## control$checkConv, : Model failed to converge with max|grad| = 0.750165
## (tol = 0.002, component 21)
summary(model)
## Linear mixed model fit by REML ['lmerMod']
## Formula: elog ~ Target * (ot1 + ot2 + ot3) + (1 + Target + ot1 + ot2 +
## ot3 | ParticipantName)
## Data: binned_3levels
##
## REML criterion at convergence: 33287.2
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.6196 -0.7165 -0.0016 0.7275 3.1674
##
## Random effects:
## Groups Name Variance Std.Dev. Corr
## ParticipantName (Intercept) 0.51151 0.7152
## TargetArtefact 1.19164 1.0916 -0.68
## TargetNeutral 0.03302 0.1817 -0.93 0.54
## ot1 19.05642 4.3654 0.38 -0.02 -0.54
## ot2 21.77520 4.6664 -0.09 0.24 -0.17 0.02
## ot3 17.99780 4.2424 -0.13 0.19 0.35 -0.57
## Residual 1.67853 1.2956
##
##
##
##
##
##
## 0.05
##
## Number of obs: 9770, groups: ParticipantName, 30
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 1.48347 0.13262 11.186
## TargetArtefact -1.88983 0.20194 -9.358
## TargetNeutral -0.55324 0.04640 -11.923
## ot1 -5.93587 0.84346 -7.038
## ot2 0.46424 0.89385 0.519
## ot3 1.73069 0.81955 2.112
## TargetArtefact:ot1 3.11478 0.38802 8.027
## TargetNeutral:ot1 1.76129 0.38716 4.549
## TargetArtefact:ot2 2.91036 0.37988 7.661
## TargetNeutral:ot2 -0.09747 0.37988 -0.257
## TargetArtefact:ot3 -7.51567 0.37544 -20.018
## TargetNeutral:ot3 -0.37769 0.37562 -1.006
##
## Correlation of Fixed Effects:
## (Intr) TrgtAr TrgtNt ot1 ot2 ot3 TrgA:1 TrgN:1 TrgA:2
## TargtArtfct -0.681
## TargetNetrl -0.738 0.435
## ot1 0.358 -0.026 -0.383
## ot2 -0.085 0.228 -0.121 0.027
## ot3 -0.119 0.177 0.237 -0.507 0.057
## TrgtArtfc:1 -0.017 0.020 0.043 -0.233 -0.025 -0.006
## TrgtNtrl:t1 -0.015 0.010 0.085 -0.230 -0.024 -0.006 0.500
## TrgtArtfc:2 -0.007 0.009 0.018 -0.027 -0.215 -0.023 0.113 0.056
## TrgtNtrl:t2 -0.006 0.004 0.037 -0.026 -0.213 -0.020 0.056 0.111 0.500
## TrgtArtfc:3 -0.001 0.002 0.005 -0.006 -0.021 -0.233 0.026 0.012 0.087
## TrgtNtrl:t3 -0.002 0.001 0.011 -0.006 -0.019 -0.229 0.012 0.025 0.044
## TrgN:2 TrgA:3
## TargtArtfct
## TargetNetrl
## ot1
## ot2
## ot3
## TrgtArtfc:1
## TrgtNtrl:t1
## TrgtArtfc:2
## TrgtNtrl:t2
## TrgtArtfc:3 0.044
## TrgtNtrl:t3 0.088 0.501
ggplot(binned_3levels, aes(x=TimeS, y=elog, color=Target)) +
stat_summary(fun.y=mean, geom="point") +
stat_summary(aes(y=predict(model,binned_3levels,re.form=NA)), fun.y=mean, geom="line")
In order to know how to interpret these simple effects, we need to remember which is our reference level.
Get main effects via model comparison…
model_null <- lmer(elog ~ Target*(ot1 + ot2) + ot3 + (1 + Target + ot1 + ot2 + ot3 | ParticipantName), data = binned_3levels)
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl =
## control$checkConv, : Model failed to converge with max|grad| = 0.036918
## (tol = 0.002, component 14)
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl =
## control$checkConv, : Model failed to converge: degenerate Hessian with 2
## negative eigenvalues
summary(model_null)
## Linear mixed model fit by REML ['lmerMod']
## Formula: elog ~ Target * (ot1 + ot2) + ot3 + (1 + Target + ot1 + ot2 +
## ot3 | ParticipantName)
## Data: binned_3levels
##
## REML criterion at convergence: 33787.3
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.6470 -0.7335 0.0212 0.7278 3.3502
##
## Random effects:
## Groups Name Variance Std.Dev. Corr
## ParticipantName (Intercept) 0.50708 0.7121
## TargetArtefact 1.18604 1.0891 -0.68
## TargetNeutral 0.02826 0.1681 -0.99 0.57
## ot1 19.63499 4.4311 0.36 0.00 -0.41
## ot2 21.63718 4.6516 -0.13 0.27 0.09 0.02
## ot3 18.27615 4.2751 -0.12 0.18 0.10 -0.59
## Residual 1.76795 1.3296
##
##
##
##
##
##
## 0.05
##
## Number of obs: 9770, groups: ParticipantName, 30
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 1.48072 0.13217 11.203
## TargetArtefact -1.87922 0.20162 -9.321
## TargetNeutral -0.55160 0.04528 -12.183
## ot1 -6.01756 0.85715 -7.020
## ot2 0.17904 0.89320 0.200
## ot3 -0.96946 0.79652 -1.217
## TargetArtefact:ot1 3.31958 0.39807 8.339
## TargetNeutral:ot1 1.77630 0.39721 4.472
## TargetArtefact:ot2 3.56848 0.38837 9.188
## TargetNeutral:ot2 -0.06304 0.38835 -0.162
##
## Correlation of Fixed Effects:
## (Intr) TrgtAr TrgtNt ot1 ot2 ot3 TrgA:1 TrgN:1 TrgA:2
## TargtArtfct -0.679
## TargetNetrl -0.753 0.440
## ot1 0.345 -0.010 -0.281
## ot2 -0.114 0.252 0.049 0.032
## ot3 -0.114 0.173 0.066 -0.541 0.048
## TrgtArtfc:1 -0.017 0.021 0.045 -0.235 -0.025 0.000
## TrgtNtrl:t1 -0.015 0.010 0.089 -0.232 -0.024 0.000 0.499
## TrgtArtfc:2 -0.007 0.009 0.019 -0.027 -0.220 -0.002 0.111 0.055
## TrgtNtrl:t2 -0.007 0.004 0.038 -0.025 -0.218 0.000 0.055 0.109 0.500
anova(model,model_null)
## refitting model(s) with ML (instead of REML)
## Warning in optwrap(optimizer, devfun, x@theta, lower = x@lower,
## calc.derivs = TRUE): convergence code 1 from bobyqa: bobyqa -- maximum
## number of function evaluations exceeded
## Warning in optwrap(optimizer, devfun, x@theta, lower = x@lower,
## calc.derivs = TRUE): convergence code 1 from bobyqa: bobyqa -- maximum
## number of function evaluations exceeded
## Data: binned_3levels
## Models:
## model_null: elog ~ Target * (ot1 + ot2) + ot3 + (1 + Target + ot1 + ot2 +
## model_null: ot3 | ParticipantName)
## model: elog ~ Target * (ot1 + ot2 + ot3) + (1 + Target + ot1 + ot2 +
## model: ot3 | ParticipantName)
## Df AIC BIC logLik deviance Chisq Chi Df Pr(>Chisq)
## model_null 32 33840 34070 -16888 33776
## model 34 33348 33592 -16640 33280 496.69 2 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Get simple effects by re-ordering factor levels.
levels(binned_3levels$Target)
## [1] "Animal" "Artefact" "Neutral"
binned_3levels$Target <- factor(binned_3levels$Target,c('Neutral','Animal','Artefact'))
levels(binned_3levels$Target)
## [1] "Neutral" "Animal" "Artefact"
model <- lmer(elog ~ Target*(ot1 + ot2 + ot3) + (1 + Target + ot1 + ot2 + ot3 | ParticipantName), data = binned_3levels)
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl =
## control$checkConv, : Model failed to converge with max|grad| = 0.0255175
## (tol = 0.002, component 13)
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl =
## control$checkConv, : Model failed to converge: degenerate Hessian with 1
## negative eigenvalues
summary(model)
## Linear mixed model fit by REML ['lmerMod']
## Formula: elog ~ Target * (ot1 + ot2 + ot3) + (1 + Target + ot1 + ot2 +
## ot3 | ParticipantName)
## Data: binned_3levels
##
## REML criterion at convergence: 33292.4
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.6069 -0.7125 -0.0104 0.7277 3.1666
##
## Random effects:
## Groups Name Variance Std.Dev. Corr
## ParticipantName (Intercept) 0.30236 0.5499
## TargetAnimal 0.02719 0.1649 1.00
## TargetArtefact 0.98719 0.9936 -0.59 -0.59
## ot1 19.12296 4.3730 0.35 0.35 0.06
## ot2 21.61993 4.6497 -0.13 -0.13 0.27 0.02
## ot3 17.93585 4.2351 -0.10 -0.10 0.17 -0.57
## Residual 1.67951 1.2960
##
##
##
##
##
##
## 0.05
##
## Number of obs: 9770, groups: ParticipantName, 30
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 0.93108 0.10302 9.038
## TargetAnimal 0.55179 0.04426 12.466
## TargetArtefact -1.33743 0.18429 -7.257
## ot1 -4.17221 0.84469 -4.939
## ot2 0.36452 0.89093 0.409
## ot3 1.34804 0.81821 1.648
## TargetAnimal:ot1 -1.76470 0.38726 -4.557
## TargetArtefact:ot1 1.35106 0.38791 3.483
## TargetAnimal:ot2 0.10104 0.37999 0.266
## TargetArtefact:ot2 3.01015 0.37988 7.924
## TargetAnimal:ot3 0.38625 0.37572 1.028
## TargetArtefact:ot3 -7.13339 0.37536 -19.004
##
## Correlation of Fixed Effects:
## (Intr) TrgtAn TrgtAr ot1 ot2 ot3 TrgtAn:1 TrgtAr:1
## TargetAniml 0.548
## TargtArtfct -0.598 -0.333
## ot1 0.335 0.207 0.046
## ot2 -0.113 -0.090 0.250 0.028
## ot3 -0.091 -0.067 0.153 -0.506 0.054
## TrgtAnml:t1 -0.019 0.089 0.011 -0.229 -0.024 -0.006
## TrgtArtfc:1 -0.021 0.044 0.022 -0.232 -0.025 -0.006 0.499
## TrgtAnml:t2 -0.008 0.039 0.005 -0.025 -0.213 -0.020 0.111 0.055
## TrgtArtfc:2 -0.009 0.019 0.010 -0.027 -0.216 -0.022 0.055 0.113
## TrgtAnml:t3 -0.003 0.012 0.002 -0.006 -0.019 -0.229 0.025 0.013
## TrgtArtfc:3 -0.002 0.006 0.003 -0.006 -0.021 -0.233 0.012 0.026
## TrgtAn:2 TrgtAr:2 TrgtAn:3
## TargetAniml
## TargtArtfct
## ot1
## ot2
## ot3
## TrgtAnml:t1
## TrgtArtfc:1
## TrgtAnml:t2
## TrgtArtfc:2 0.500
## TrgtAnml:t3 0.088 0.044
## TrgtArtfc:3 0.044 0.087 0.500
Clean up our workspace.
ls()
## [1] "binned" "binned_3levels"
## [3] "binned2" "curWarnings"
## [5] "data" "model"
## [7] "model_cubic" "model_linear"
## [9] "model_null" "model_quartic"
## [11] "model_quintic" "natural_model"
## [13] "new_condition" "orthogonal_polynomials"
## [15] "time_codes"
rm(list=ls())