R.square <- function(x, method="average", k){
### R-square for hierarchical clustering given number of cutting groups
### x : original data (col=variables, row=observations)
### method: hierarchical clustering linkage method
### k : number of cutting groups
hc <- hclust(dist(x), method=method)
group <- as.matrix(cutree(hc,k)) # get the group vectors
## sum of squares for all the elements
ss <- function(x) sum(scale(x, scale = FALSE)^2)
## calculate BetweenSS, totalWithinSS, and R-squares
betweenss <- NULL # reserve space
tot.withinss <- NULL
r.square <- NULL
for(i in 1:length(k)){
## calculate cluster center "fitted" to each obs.:
fitted.x <- NULL # reserve space
for(n in 1:nrow(x)){
g <- group[n,i] # get the group for each obs.
fitted.x <- rbind(fitted.x, colMeans(x[group[,i]==g, , drop = FALSE]))
}
resid.x <- x - fitted.x
betweenss <- rbind(betweenss, ss(fitted.x))
tot.withinss <- rbind(tot.withinss, ss(resid.x))
r.square <- rbind(r.square, ss(fitted.x)/ss(x))
}
return(data.frame(K.groups = k,
betweenss = betweenss,
tot.withinss = tot.withinss,
totss = rep(ss(x),length(k)),
r.square = r.square))
}
2012年8月16日 星期四
R-square for hierarchical clustering
2012年6月1日 星期五
Clustering: Hierarchical Clustering
### Hierarchical Clustering hc <- hclust(dist(USArrests), method="average", members=NULL) # Cluster linkage methods: # "ward" # "single" # "complete" # "average" # "mcquitty" # "median" # "centroid" summary(hc) ## plot Cluster Dendrogram plot(hc, hang=-1, labels=abbreviate(row.names(USArrests),2), cex=0.8) # ?plclust() rect.hclust(hc, k=5, border="red") # 在圖中框出各群 # Cutting the dendrogram to form k=5 clusters here: groups <- cutree(hc, k=5) # cut tree into 5 clusters
訂閱:
文章 (Atom)