#################################################################################### #This is a file with executable R code for chapter 7 of Natalia Levshina's (2015) #How to Do Linguistics with R. Amsterdam/Philadelphia: John Benjamins. #################################################################################### ###Section 7.2 ##Main text install.packages(c("leaps", "car", "rms", "visreg", "boot")) library(Rling); library(leaps); library(car); library(rms); library(visreg); library(boot) data(ELP) str(ELP) m <- lm(Mean_RT ~ Length + log(SUBTLWF) + POS, data = ELP) summary(m) fitted(m)[1] confint(m) m0 <- lm(Mean_RT ~ 1, data = ELP) m.fw <- step(m0, direction = "forward", scope = ~ Length + log(SUBTLWF) + POS) m.bw <- step(m, direction = "backward") m.bw m.both <- step(m0, scope = ~ Length + log(SUBTLWF) + POS) m.both subsets <- regsubsets(Mean_RT ~ Length + log(SUBTLWF) + POS, data = ELP, nbest = 4) round(summary(subsets)$rsq, 3) round(summary(subsets)$adjr2, 3) summary(subsets)$which drop1(m, test = "F") influencePlot(m, id.method = "identify") ELP[331, ] m1 <- lm(Mean_RT ~ Length + log(SUBTLWF) + POS, data = ELP[-331,]) summary(m1) par(mfrow = c(1, 2)) crPlot(m, var = "Length") crPlot(m, var = "log(SUBTLWF)") par(mfrow = c(1, 1)) plot(m, which = 1) ncvTest(m) ncvTest(m, ~ Length) boxCox(m) boxCox(m, lambda = seq(-3, 3, 1/10)) m.trans <- lm(Mean_RT^-1.3 ~ Length + log(SUBTLWF) + POS, data = ELP) ncvTest(m.trans) car::vif(m.trans) Length1 <- ELP$Length Length1[1:3] <- rep(8, 3) head(ELP$Length) head(Length1) m.test <- lm(Mean_RT^-1.3 ~ Length + log(SUBTLWF) + POS + Length1, data = ELP) car::vif(m.test) summary(m.test) durbinWatsonTest(m.trans) shapiro.test(residuals(m.trans)) m.int <- lm(Mean_RT^-1.3 ~ Length*POS + log(SUBTLWF), data = ELP) anova(m.trans, m.int) visreg(m.int, xvar = "Length", by = "POS") summary(m.int) m.val <- ols(Mean_RT^-1.3 ~ Length*POS + log(SUBTLWF), data = ELP, x = TRUE, y = TRUE) validate(m.val, bw = TRUE, B = 200) bootcoef <- function(formula, data, indices) { d <- data[indices,] model <- lm(formula, data=d) return(coef(model)) } m.boot <- boot(formula = Mean_RT ~ Length + log(SUBTLWF) + POS, data = ELP, statistic = bootcoef, R = 2000) boot.ci(m.boot, type = "bca", index = 1) boot.ci(m.boot, type = "bca", index = 2) bootR2 <- function(formula, data, indices) { d <- data[indices,] model <- lm(formula, data=d) return(summary(model)$r.squared) } m.boot.R2 <- boot(formula = Mean_RT ~ Length + log(SUBTLWF) + POS, data = ELP, statistic = bootR2, R = 2000) boot.ci(m.boot.R2, type = "bca")