#################################################################################### #This is a file with executable R code for chapter 15 of Natalia Levshina's (2015) #How to Do Linguistics with R. Amsterdam/Philadelphia: John Benjamins. #################################################################################### ###Section 15.2 ##Main text install.packages(c("cluster", "pvclust", "vcd")) library(Rling); library(cluster); library(pvclust); library(vcd) data(caus) str(caus) levels(caus$Cx) be_made_toV <- caus[caus$Cx == "be_made_toV", -1] cause_toV <- caus[caus$Cx == "cause_toV", -1] get_toV <- caus[caus$Cx == "get_toV", -1] get_Ved <- caus[caus$Cx == "get_Ved", -1] get_Ving <- caus[caus$Cx == "get_Ving", -1] have_V <- caus[caus$Cx == "have_V", -1] have_Ved <- caus[caus$Cx == "have_Ved", -1] have_Ving <- caus[caus$Cx == "have_Ving", -1] make_V <- caus[caus$Cx == "make_V", -1] prop.table(table(make_V$CrSem)) prop.table(table(make_V$CeSem)) make_V.bp <- bp(make_V) make_V.bp be_made_toV.bp <- bp(be_made_toV) cause_toV.bp <- bp(cause_toV) get_toV.bp <- bp(get_toV) get_Ved.bp <- bp(get_Ved) get_Ving.bp <- bp(get_Ving) have_V.bp <- bp(have_V) have_Ved.bp <- bp(have_Ved) have_Ving.bp <- bp(have_Ving) caus.bp <- rbind(be_made_toV.bp, cause_toV.bp, get_toV.bp, get_Ved.bp, get_Ving.bp, have_V.bp, have_Ved.bp, have_Ving.bp, make_V.bp) dim(caus.bp) rownames(caus.bp) <- levels(caus$Cx) caus.dist <- dist(caus.bp, method = "canberra") round(caus.dist, 2) max(caus.dist) min(caus.dist) caus.hc <- hclust(caus.dist, method = "ward.D2") plot(caus.hc, hang = -1) test.clust <- cutree(caus.hc, k = 2) test.clust summary(silhouette(test.clust, caus.dist))$avg.width asw <- sapply(2:8, function(x) summary(silhouette(cutree(caus.hc, k = x), caus.dist))$avg.width) asw rect.hclust(caus.hc, k = 4) c1 <- caus.bp[c(4,7),] c2 <- caus.bp[-c(4,7),] c1.bp <- colMeans(c1) c2.bp <- colMeans(c2) diff <- c1.bp - c2.bp sort(diff, decreasing = TRUE) plot(sort(diff)*1.2, 1:length(diff), type = "n", xlab = "cluster 2 <–-> cluster 1", yaxt = "n", ylab = "") text(sort(diff), 1:length(diff), names(sort(diff))) cluster <- as.character(caus$Cx) cluster[cluster == "get_Ved"|cluster == "have_Ved"] = "1" cluster[cluster != "1"] = "2" cluster <- as.factor(cluster) assocstats(table(cluster, caus$CrSem)) caus.pvc <- pvclust(t(caus.bp), method.hclust = "ward.D2", method.dist = "canberra") plot(caus.pvc, hang = -1) pvrect(caus.pvc, alpha = 0.95) test.clust <- kmeans(caus.bp, 4, nstart = 25) test.clust$cluster test.clust$tot.withinss wcss <- sapply(1:8, function(x) kmeans(caus.bp, x, nstart = 25)$tot.withinss) wcss plot(1:8, wcss, type = "b", main = "Scree plot of WCSS for n clusters", xlab = "n of clusters", ylab = "WCSS") caus.pam <- pam(caus.dist, k = 4) caus.pam$clustering caus.pam$medoids caus.pam$silinfo$avg.width asw <- sapply(2:8, function(x) pam(caus.dist, k = x)$silinfo$avg.width) asw ##Boxes with additional information caus.split <- split(caus, caus$Cx) str(caus.split) length(caus.split) caus.split <- lapply(caus.split, function(x) x = x[, -1]) caus.split.bp <- lapply(caus.split, bp) caus.split.bp[1] caus.bp1 <- do.call(rbind, caus.split.bp) identical(caus.bp, caus.bp1) adist("water", "wine")