#################################################################################### #This is a file with executable R code for chapter 4 of Natalia Levshina's (2015) #How to Do Linguistics with R. Amsterdam/Philadelphia: John Benjamins. #################################################################################### ###Section 4.1 ##Main text install.packages("ggplot2") #if you have not installed it yet library(Rling); library(ggplot2) data(sent) str(sent) summary(sent$clause) table(sent$clause) sent.t <- table(sent$clause) sent.t sent.t/sum(sent.t) prop.table(sent.t) prop.table(sent.t)*100 ##Boxes with additional information mean(sent$clause == "Intrans") sent$clause == "Intrans" as.numeric(sent$clause == "Intrans") 10/8 8/10 ###Section 4.2 ##Main text pie(sent.t) sent_labels <- prop.table(sent.t)*100 sent_labels sent_labels <- paste(sent_labels, "%", sep = "") sent_labels sent_colours <- c("black", "grey40", "grey80") pie(sent.t, main = "Pie chart of clause types", labels = sent_labels, col = sent_colours) legend(1, 0, legend = levels(sent$clause), fill = sent_colours) barplot(sent.t, main = "Bar plot of clause types", col = "grey50", cex.names = 1.2, xlab = "Clause type", ylab = "Frequency") dotchart(sent.t, main = "Dot chart of clause types", xlab = "Frequency", ylab = "Clause type", lcolor = "black", pch = 16, xlim = c(0, 12)) ##Boxes with additional information ggplot(sent, aes(x = factor(""), fill = clause)) + geom_bar() + coord_polar(theta = "y") + scale_x_discrete("") + scale_fill_manual(values = c("black", "grey40", "grey80")) ggplot(sent, aes(x = clause)) + geom_bar(fill = "white", colour = "black") + xlab("Clause type") + ylab("Frequency") clause1 <- sent$clause head(clause1) clause1[1] <- "Intrans" head(clause1) clause1[2] <- "Copula" head(clause1) clause1 <- factor(clause1, levels = c(levels(clause1), "Copula")) summary(clause1) clause1[2] <- "Copula" head(clause1) clause1 <- factor(clause1, levels = c(levels(clause1), "NonTrans")) clause1[clause1=="Intrans"|clause1=="Copula"] <- "NonTrans" summary(clause1) clause1 <- factor(clause1) summary(clause1) levels(clause1) clause1 <- factor(clause1, levels = c("Trans", "NonTrans", "Ditr")) levels(clause1) clause1 <- relevel(clause1, ref = "NonTrans") levels(clause1) ggplot(sent, aes(x = clause)) + geom_point(stat = "bin", size = 5) + xlab("Clause type") + ylab("Frequency") + coord_flip() + ylim(0, 12) + theme_bw() + theme(panel.grid.major.x = element_blank(), panel.grid.major.y = element_line(colour = "grey60", linetype = "dashed")) ###Section 4.3 ##Main text library(Rling) data(colreg) colreg freqreg <- c(95385672, 90344134, 91044778, 187245672) freqreg exp_prop <- prop.table(freqreg) exp_prop colreg[1,] black_obs <- prop.table(colreg[1,]) black_obs DP_black <- sum(abs(black_obs - exp_prop))/2 DP_black_norm <- DP_black/(1 - min(exp_prop)) gray_obs <- prop.table(colreg[4,]) gray_obs DP_gray <- sum(abs(gray_obs - exp_prop))/2 DP_gray DP_gray_norm <- DP_gray/(1 - min(exp_prop)) DP_gray_norm primcol <- colreg[c(1, 2, 5, 9:11),] primcol seccol <- colreg[-c(1, 2, 5, 9:11),] seccol primcol_sums <- colSums(primcol) primcol_sums seccol_sums <- colSums(seccol) seccol_sums primcol_obs <- prop.table(primcol_sums) primcol_obs seccol_obs <- prop.table(seccol_sums) seccol_obs DP_primcol <- sum(abs(primcol_obs - exp_prop))/2 DP_primcol DP_seccol <- sum(abs(seccol_obs - exp_prop))/2 DP_seccol DP_primcol_norm <- DP_primcol/(1 - min(exp_prop)) DP_primcol_norm DP_seccol_norm <- DP_seccol/(1 - min(exp_prop)) DP_seccol_norm