#################################################################################### #This is a file with executable R code for chapter 5 of Natalia Levshina's (2015) #How to Do Linguistics with R. Amsterdam/Philadelphia: John Benjamins. #################################################################################### ###Section 5.2 ##Main text install.packages(c("ggplot2", "gplots")) # remove "ggplot2" if you have already installed it library(Rling); library(ggplot2); library(gplots) data(pym_high) data(pym_low) head(pym_high) str(pym_high) summary(pym_high\$assoc) summary(pym_low\$assoc) boxplot(pym_high\$assoc, pym_low\$assoc, names = c("high", "low"), main = "Box plots of average numbers of associations", xlab = "Frequency group", ylab = "Average number of associations") boxplot.stats(pym_low\$assoc)\$out pym_low[pym_low\$assoc == 3, ] t.test(pym_high\$assoc, pym_low\$assoc, alternative = "greater") t.test(pym_low\$assoc, pym_high\$assoc, alternative = "less") qnorm((1 - 0.95)/2, lower.tail = FALSE) qt((1 - 0.95)/2, df = length(pym_high\$assoc) - 1, lower.tail = FALSE) se.high <- sd(pym_high\$assoc)/sqrt(length(pym_high\$assoc)) se.high ci.lower.high <- mean(pym_high\$assoc) - 1.96*se.high ci.lower.high ci.upper.high <- mean(pym_high\$assoc) + 1.96*se.high ci.upper.high se.low <- sd(pym_low\$assoc)/sqrt(length(pym_low\$assoc)) se.low ci.lower.low <- mean(pym_low\$assoc) - 1.96*se.low ci.lower.low ci.upper.low <- mean(pym_low\$assoc) + 1.96*se.low ci.upper.low means <- c(mean(pym_high\$assoc), mean(pym_low\$assoc)) means ci.lower <- c(ci.lower.high, ci.lower.low) ci.lower ci.upper <- c(ci.upper.high, ci.upper.low) ci.upper barplot2(means, plot.ci = TRUE, ci.l = ci.lower, ci.u = ci.upper, main = "Bar plot with 95% confidence intervals", xlab = "Frequency groups", ylab = "Average number of associations", names = c("High", "Low")) ##Boxes with additional information pym_assoc <- data.frame(assoc = c(pym_high\$assoc, pym_low\$assoc), freq = c(rep("high", 50), rep("low", 51))) head(pym_assoc) ggplot(pym_assoc, aes(x = freq, y = assoc)) + geom_boxplot() + xlab("Frequency group") + ylab("Average number of associations") pym_low[order(pym_low\$imag),] pym_low[order(-pym_low\$imag),] pym_low[order(pym_low\$syl, pym_low\$let),] head(pym_assoc) tail(pym_assoc) t.test(pym_assoc\$assoc ~ pym_assoc\$freq, alternative = "greater") levels(pym_assoc\$freq) assoc.df <- data.frame(group = c("High", "Low"), mean = means, se = c(se.high, se.low)) assoc.df ggplot(assoc.df, aes(x = group, y = mean)) + geom_bar(stat = "identity", fill = "lightblue", colour = "black") + xlab("Frequency group") + ylab("Average number of associations") + geom_errorbar(aes(ymin = mean - 1.96*se, ymax = mean + 1.96*se), width = 0.2) ###Section 5.3 ##Main text install.packages("ggplot2") #if you haven't installed the package yet library(Rling); library(ggplot2) data(pym_high) data(pym_low) summary(pym_high\$conc) summary(pym_low\$conc) qqnorm(pym_high\$conc, main = "Q–Q plot of concreteness scores") qqline(pym_high\$conc) plot(density(pym_high\$conc), main = "Density plot of concreteness scores", xlab = "Concreteness") stripchart(list(pym_high\$conc, pym_low\$conc), main = "Distribution of concreteness scores", group.names = c("high", "low"), method = "jitter", xlim = c(1, 7)) rug(pym_high\$conc, side = 1) rug(pym_low\$conc, side = 3) pym_high[pym_high\$conc > 6, 4, drop = FALSE] pym_high[pym_high\$conc > 6, 4] pym_high[pym_high\$conc > 2&pym_high\$conc < 4, 4, drop = FALSE] pym_high[pym_high\$conc > 4&pym_high\$conc < 6, 4, drop = FALSE] shapiro.test(pym_high\$conc) shapiro.test(pym_low\$conc) wilcox.test(pym_high\$conc, pym_low\$conc, correct = FALSE, conf.int = TRUE) ##Boxes with additional information pym_conc <- data.frame(conc = c(pym_high\$conc, pym_low\$conc), freq = c(rep("high", 50), rep("low", 51))) ggplot(pym_conc, aes(x = freq, y = conc)) + geom_point(position = position_jitter(width = 0.05), shape=0) + coord_flip() + labs(x = "Frequency group", y = "Average concreteness score", ylim = c(1, 7)) ###Section 5.4 ##Main text library(Rling) data(pym_high) diff <- rnorm(50, -1.35, 1.27) nn <- pym_high\$assoc + diff head(nn) nn <- round(nn, 2) shapiro.test(diff) t.test(pym_high\$assoc, nn, alternative = "greater", paired = TRUE)