Related Posts Plugin for WordPress, Blogger...

2013年9月22日 星期日

Assinment#5

## 1. textbook 12.8
library('lavaan')
lower <- "
0,
9, 0,
3, 7, 0,
6, 5, 9, 0,
11, 10, 2, 8, 0,"

D = as.dist(getCov(lower, names=c("1","2","3","4","5")))
hc.avg <- hclust(D, method="average")
hc.s <- hclust(D, method="single")
par(mfrow=c(1,2))
plot(hc.avg, hang=-1, main="Cluster Dendrogram (Average linkage)")
plot(hc.s, hang=-1, main="Cluster Dendrogram (Single linkage)")

## 2. textbook 12.9
library('lavaan')
lower <- "
0,
.76, 0,
2.97, .8, 0,
4.88, 4.17, .21, 0,
3.86, 1.92, 1.51, .51, 0,"

D = as.dist(getCov(lower, names=c("1","2","3","4","5")))
par(mfrow=c(2,2))
plot(hc.s <-hclust(D, method="single"),main="Single linkage") # single linkage
plot(hc.c <-hclust(D, method="complete"),main="Complete linkage") # complete linkage
plot(hc.avg <-hclust(D, method="average"),main="Average linkage") # average linkage

## 3.
x <- data.frame(X1=c(5,-1,1,-3),X2=c(3,1,-2,-2),row.names=c(LETTERS[1:4]))
k.cl <- kmeans(x, 2)
k.cl$cluster


## 4.
FOODP <- read.table("FOODP.DAT", skip=2, header = TRUE)
prin.cr <- prcomp( ~Bread+Hamburger+Butter+Apples+Tomatoes, data=FOODP, scale=TRUE)
pc.score <- prin.cr$x[,1:2]  # get pc scores of the first 2 dim.
pc.score <- -pc.score  # 調整正負號以便和價格高低相符
hc <- hclust(dist(pc.score),method="average")
groups <- cutree(hc,k=1:10)

# (a)
#---------------------------------------------------------
R.square <- function(x, method="average", k){
  ### R-square for hierarchical clustering given number of cutting groups
  ### x     : original data (col=variables, row=observations)
  ### method: hierarchical clustering linkage method
  ### k     : number of cutting groups

  hc <- hclust(dist(x), method=method)
  group <- as.matrix(cutree(hc,k)) # get the group vectors

  ## sum of squares for all the elements
  ss <- function(x) sum(scale(x, scale = FALSE)^2)

  ## calculate BetweenSS, totalWithinSS, and R-squares
  betweenss <- NULL     # reserve space
  tot.withinss <- NULL
  r.square <- NULL

  for(i in 1:length(k)){
    ## calculate cluster center "fitted" to each obs.:
    fitted.x <- NULL  # reserve space
    for(n in 1:nrow(x)){
      g <- group[n,i]  # get the group for each obs.
      fitted.x <- rbind(fitted.x, colMeans(x[group[,i]==g, , drop = FALSE]))
    }
    resid.x <- x - fitted.x

  betweenss <- rbind(betweenss, ss(fitted.x))
  tot.withinss <- rbind(tot.withinss, ss(resid.x))
  r.square <- rbind(r.square, ss(fitted.x)/ss(x))
  }

  return(data.frame(K.groups = k,
                    betweenss = betweenss, 
                    tot.withinss = tot.withinss,
                    totss = rep(ss(x),length(k)),
                    r.square = r.square))
}
#-------------------------------------------------------

RR <- data.frame(
Single = R.square(pc.score, "single", k=1:10)$r.square,
Complete = R.square(pc.score, "complete", k=1:10)$r.square,
Average = R.square(pc.score, "average", k=1:10)$r.square,
Ward = R.square(pc.score, "ward", k=1:10)$r.square)
RR

R2 <- read.table("R-squares.txt", header = TRUE)

plot(1:10,RR[,1], pch=15, type="b", xaxp=c(0,nrow(R2),nrow(R2)),
   xlab="Number of clusters", ylab="R-squares",panel.first = grid())
points(1:10,RR[,2], pch=16, col=2, type="b")
points(1:10,RR[,3], pch=17, col=3, type="b")
points(1:10,RR[,4], pch=18, col=4, type="b")
legend(x="bottomright",legend=names(RR),
   lty=rep(1,4), col=c(1:4), pch=c(15:18),bg="white")


# (b)
cl.center <- function(x, method, k){
  hc <- hclust(dist(x), method=method)
  memb <- cutree(hc, k) # get the group vectors
  cent <- NULL  # reserve space
  for(j in 1:k){
    cent <- rbind(cent, colMeans(x[memb == j, , drop = FALSE]))
  }
  return(cent)
}

centroid <- cl.center(pc.score,"single",k=4)
k.cl <- kmeans(pc.score, centers=centroid)
k.cl$cluster
table(k.cl$cluster)  # 各群的人數
## 微調 k.cl$cluster[16] <- 4

# 先利用plot( , type="n" )將資料的範圍先畫出來,
# 再利用text( )把每一個資料的名稱都點出
plot(pc.score, type="n")
text(pc.score, col=k.cl$cluster, labels=row.names(pc.score))
abline(h=0, v=0, lty="dashed")

沒有留言:

張貼留言