2012年8月16日 星期四

R-square for hierarchical clustering

R.square <- function(x, method="average", k){
### R-square for hierarchical clustering given number of cutting groups
### x : original data (col=variables, row=observations)
### method: hierarchical clustering linkage method
### k : number of cutting groups

hc <- hclust(dist(x), method=method)
group <- as.matrix(cutree(hc,k)) # get the group vectors

## sum of squares for all the elements
ss <- function(x) sum(scale(x, scale = FALSE)^2)

## calculate BetweenSS, totalWithinSS, and R-squares
betweenss <- NULL # reserve space
tot.withinss <- NULL
r.square <- NULL

for(i in 1:length(k)){
## calculate cluster center "fitted" to each obs.:
fitted.x <- NULL # reserve space
for(n in 1:nrow(x)){
g <- group[n,i] # get the group for each obs.
fitted.x <- rbind(fitted.x, colMeans(x[group[,i]==g, , drop = FALSE]))
}
resid.x <- x - fitted.x

betweenss <- rbind(betweenss, ss(fitted.x))
tot.withinss <- rbind(tot.withinss, ss(resid.x))
r.square <- rbind(r.square, ss(fitted.x)/ss(x))
}

return(data.frame(K.groups = k,
betweenss = betweenss,
tot.withinss = tot.withinss,
totss = rep(ss(x),length(k)),
r.square = r.square))
}