## 1. textbook 12.8
library('lavaan')
lower <- "
0,
9, 0,
3, 7, 0,
6, 5, 9, 0,
11, 10, 2, 8, 0,"
D = as.dist(getCov(lower, names=c("1","2","3","4","5")))
hc.avg <- hclust(D, method="average")
hc.s <- hclust(D, method="single")
par(mfrow=c(1,2))
plot(hc.avg, hang=-1, main="Cluster Dendrogram (Average linkage)")
plot(hc.s, hang=-1, main="Cluster Dendrogram (Single linkage)")
## 2. textbook 12.9
library('lavaan')
lower <- "
0,
.76, 0,
2.97, .8, 0,
4.88, 4.17, .21, 0,
3.86, 1.92, 1.51, .51, 0,"
D = as.dist(getCov(lower, names=c("1","2","3","4","5")))
par(mfrow=c(2,2))
plot(hc.s <-hclust(D, method="single"),main="Single linkage") # single linkage
plot(hc.c <-hclust(D, method="complete"),main="Complete linkage") # complete linkage
plot(hc.avg <-hclust(D, method="average"),main="Average linkage") # average linkage
## 3.
x <- data.frame(X1=c(5,-1,1,-3),X2=c(3,1,-2,-2),row.names=c(LETTERS[1:4]))
k.cl <- kmeans(x, 2)
k.cl$cluster
## 4.
FOODP <- read.table("FOODP.DAT", skip=2, header = TRUE)
prin.cr <- prcomp( ~Bread+Hamburger+Butter+Apples+Tomatoes, data=FOODP, scale=TRUE)
pc.score <- prin.cr$x[,1:2] # get pc scores of the first 2 dim.
pc.score <- -pc.score # 調整正負號以便和價格高低相符
hc <- hclust(dist(pc.score),method="average")
groups <- cutree(hc,k=1:10)
# (a)
#---------------------------------------------------------
R.square <- function(x, method="average", k){
### R-square for hierarchical clustering given number of cutting groups
### x : original data (col=variables, row=observations)
### method: hierarchical clustering linkage method
### k : number of cutting groups
hc <- hclust(dist(x), method=method)
group <- as.matrix(cutree(hc,k)) # get the group vectors
## sum of squares for all the elements
ss <- function(x) sum(scale(x, scale = FALSE)^2)
## calculate BetweenSS, totalWithinSS, and R-squares
betweenss <- NULL # reserve space
tot.withinss <- NULL
r.square <- NULL
for(i in 1:length(k)){
## calculate cluster center "fitted" to each obs.:
fitted.x <- NULL # reserve space
for(n in 1:nrow(x)){
g <- group[n,i] # get the group for each obs.
fitted.x <- rbind(fitted.x, colMeans(x[group[,i]==g, , drop = FALSE]))
}
resid.x <- x - fitted.x
betweenss <- rbind(betweenss, ss(fitted.x))
tot.withinss <- rbind(tot.withinss, ss(resid.x))
r.square <- rbind(r.square, ss(fitted.x)/ss(x))
}
return(data.frame(K.groups = k,
betweenss = betweenss,
tot.withinss = tot.withinss,
totss = rep(ss(x),length(k)),
r.square = r.square))
}
#-------------------------------------------------------
RR <- data.frame(
Single = R.square(pc.score, "single", k=1:10)$r.square,
Complete = R.square(pc.score, "complete", k=1:10)$r.square,
Average = R.square(pc.score, "average", k=1:10)$r.square,
Ward = R.square(pc.score, "ward", k=1:10)$r.square)
RR
R2 <- read.table("R-squares.txt", header = TRUE)
plot(1:10,RR[,1], pch=15, type="b", xaxp=c(0,nrow(R2),nrow(R2)),
xlab="Number of clusters", ylab="R-squares",panel.first = grid())
points(1:10,RR[,2], pch=16, col=2, type="b")
points(1:10,RR[,3], pch=17, col=3, type="b")
points(1:10,RR[,4], pch=18, col=4, type="b")
legend(x="bottomright",legend=names(RR),
lty=rep(1,4), col=c(1:4), pch=c(15:18),bg="white")
# (b)
cl.center <- function(x, method, k){
hc <- hclust(dist(x), method=method)
memb <- cutree(hc, k) # get the group vectors
cent <- NULL # reserve space
for(j in 1:k){
cent <- rbind(cent, colMeans(x[memb == j, , drop = FALSE]))
}
return(cent)
}
centroid <- cl.center(pc.score,"single",k=4)
k.cl <- kmeans(pc.score, centers=centroid)
k.cl$cluster
table(k.cl$cluster) # 各群的人數
## 微調 k.cl$cluster[16] <- 4
# 先利用plot( , type="n" )將資料的範圍先畫出來,
# 再利用text( )把每一個資料的名稱都點出
plot(pc.score, type="n")
text(pc.score, col=k.cl$cluster, labels=row.names(pc.score))
abline(h=0, v=0, lty="dashed")