## 1. textbook 12.8 library('lavaan') lower <- " 0, 9, 0, 3, 7, 0, 6, 5, 9, 0, 11, 10, 2, 8, 0,"
D = as.dist(getCov(lower, names=c("1","2","3","4","5"))) hc.avg <- hclust(D, method="average") hc.s <- hclust(D, method="single") par(mfrow=c(1,2)) plot(hc.avg, hang=-1, main="Cluster Dendrogram (Average linkage)") plot(hc.s, hang=-1, main="Cluster Dendrogram (Single linkage)")
## 2. textbook 12.9 library('lavaan') lower <- " 0, .76, 0, 2.97, .8, 0, 4.88, 4.17, .21, 0, 3.86, 1.92, 1.51, .51, 0,"
D = as.dist(getCov(lower, names=c("1","2","3","4","5"))) par(mfrow=c(2,2)) plot(hc.s <-hclust(D, method="single"),main="Single linkage") # single linkage plot(hc.c <-hclust(D, method="complete"),main="Complete linkage") # complete linkage plot(hc.avg <-hclust(D, method="average"),main="Average linkage") # average linkage
## 3. x <- data.frame(X1=c(5,-1,1,-3),X2=c(3,1,-2,-2),row.names=c(LETTERS[1:4])) k.cl <- kmeans(x, 2) k.cl$cluster
## 4. FOODP <- read.table("FOODP.DAT", skip=2, header = TRUE) prin.cr <- prcomp( ~Bread+Hamburger+Butter+Apples+Tomatoes, data=FOODP, scale=TRUE) pc.score <- prin.cr$x[,1:2] # get pc scores of the first 2 dim. pc.score <- -pc.score # 調整正負號以便和價格高低相符 hc <- hclust(dist(pc.score),method="average") groups <- cutree(hc,k=1:10)
# (a) #--------------------------------------------------------- R.square <- function(x, method="average", k){ ### R-square for hierarchical clustering given number of cutting groups ### x : original data (col=variables, row=observations) ### method: hierarchical clustering linkage method ### k : number of cutting groups hc <- hclust(dist(x), method=method) group <- as.matrix(cutree(hc,k)) # get the group vectors ## sum of squares for all the elements ss <- function(x) sum(scale(x, scale = FALSE)^2) ## calculate BetweenSS, totalWithinSS, and R-squares betweenss <- NULL # reserve space tot.withinss <- NULL r.square <- NULL for(i in 1:length(k)){ ## calculate cluster center "fitted" to each obs.: fitted.x <- NULL # reserve space for(n in 1:nrow(x)){ g <- group[n,i] # get the group for each obs. fitted.x <- rbind(fitted.x, colMeans(x[group[,i]==g, , drop = FALSE])) } resid.x <- x - fitted.x betweenss <- rbind(betweenss, ss(fitted.x)) tot.withinss <- rbind(tot.withinss, ss(resid.x)) r.square <- rbind(r.square, ss(fitted.x)/ss(x)) } return(data.frame(K.groups = k, betweenss = betweenss, tot.withinss = tot.withinss, totss = rep(ss(x),length(k)), r.square = r.square)) } #-------------------------------------------------------
RR <- data.frame( Single = R.square(pc.score, "single", k=1:10)$r.square, Complete = R.square(pc.score, "complete", k=1:10)$r.square, Average = R.square(pc.score, "average", k=1:10)$r.square, Ward = R.square(pc.score, "ward", k=1:10)$r.square) RR
R2 <- read.table("R-squares.txt", header = TRUE) plot(1:10,RR[,1], pch=15, type="b", xaxp=c(0,nrow(R2),nrow(R2)), xlab="Number of clusters", ylab="R-squares",panel.first = grid()) points(1:10,RR[,2], pch=16, col=2, type="b") points(1:10,RR[,3], pch=17, col=3, type="b") points(1:10,RR[,4], pch=18, col=4, type="b") legend(x="bottomright",legend=names(RR), lty=rep(1,4), col=c(1:4), pch=c(15:18),bg="white")
# (b) cl.center <- function(x, method, k){ hc <- hclust(dist(x), method=method) memb <- cutree(hc, k) # get the group vectors cent <- NULL # reserve space for(j in 1:k){ cent <- rbind(cent, colMeans(x[memb == j, , drop = FALSE])) } return(cent) }
centroid <- cl.center(pc.score,"single",k=4) k.cl <- kmeans(pc.score, centers=centroid) k.cl$cluster table(k.cl$cluster) # 各群的人數 ## 微調 k.cl$cluster[16] <- 4
# 先利用plot( , type="n" )將資料的範圍先畫出來, # 再利用text( )把每一個資料的名稱都點出 plot(pc.score, type="n") text(pc.score, col=k.cl$cluster, labels=row.names(pc.score)) abline(h=0, v=0, lty="dashed")