#################################################################################### #This is a file with executable R code for chapter 16 of Natalia Levshina's (2015) #How to Do Linguistics with R. Amsterdam/Philadelphia: John Benjamins. #################################################################################### ###Section 16.2 ##Main text install.packages("cluster") library(Rling); library(cluster) data(cooking) head(cooking) cooking <- cooking[rowSums(cooking) > 0, ] exp.bake <- sum(cooking$Bake)*rowSums(cooking)/sum(cooking) exp.bake[1:5] PMI.bake <- log2(cooking$Bake/exp.bake) PMI.bake[1:5] PPMI.bake <- ifelse(PMI.bake < 0, 0, PMI.bake) PPMI.bake[1:5] cooking <- as.matrix(cooking) cooking.exp <- chisq.test(cooking)$expected cooking.PMI <- log2(cooking/cooking.exp) cooking.PPMI <- ifelse(cooking.PMI < 0, 0, cooking.PMI) crossprod(cooking.PPMI[, 1], cooking.PPMI[, 10])/sqrt(crossprod(cooking.PPMI[, 1]) *crossprod(cooking.PPMI[, 10])) crossprod(cooking.PPMI[, 2], cooking.PPMI[, 10])/sqrt(crossprod(cooking.PPMI[, 2]) *crossprod(cooking.PPMI[, 10])) cooking1 <- t(cooking.PPMI) cooking.cos <- cossim(cooking1) round(cooking.cos, 2) cooking.dist <- 1 - (cooking.cos/0.07958) cooking.dist <- as.dist(cooking.dist) test.clust <- pam(cooking.dist, 2) test.clust$silinfo$avg.width test.clust <- pam(cooking.dist, 6) test.clust$clustering