### K-Means Clustering
## 標準化資料 standardize variables: scale()
x <- matrix(1:10, ncol=2) # column centering and then scaling
cov(centered.scaled.x <- scale(x)) # all 1
(centered.x <- scale(x,center=TRUE,scale=FALSE)) # 只減掉平均值
# a 2-dim. K-means clustering example
x <- rbind(matrix(rnorm(100, sd = 0.3), ncol = 2),
matrix(rnorm(100, mean = 1, sd = 0.3), ncol = 2))
colnames(x) <- c("x", "y")
k.cl <- kmeans(x, centers=2) # K-means clustering
# 以不同顏色畫分群後的data(Bivariate)
# 先利用plot( , type="n" )將資料的範圍先畫出來,
# 再利用text( )把每一個資料的名稱都點出
plot(x, type="n")
text(x, col=k.cl$cluster, labels=row.names(x))
points(k.cl$centers, col = 1:2, pch = 8, cex=2) # 畫出各群中心點
## Get cluster Means
aggregate(x,by=list(k.cl$cluster),FUN=mean)
k.cl$centers
### Determine number of clusters
## Within groups sum of squares (SSW)
SSW <- function(data){
n <- nrow(data)-1
ssw <- (nrow(data)-1)*sum(apply(data,2,var))
for (i in 2:n) ssw[i] <- sum(kmeans(data, centers=i)$withinss)
plot(1:n, ssw, type="b", xlab="Number of Clusters",
ylab="Within groups sum of squares")
return(data.frame(No.of.clusters=c(1:n), SSW=ssw))
}
## R square scree plot
R.square.km <- function(data){
n <- nrow(data)-1
ssw <- (nrow(data)-1)*sum(apply(data,2,var))
for (i in 2:n) ssw[i] <- sum(kmeans(data, centers=i)$withinss)
ss <- function(x) sum(scale(x, scale = FALSE)^2) # sum of squares
R.square <- 1-(ssw/ss(data))
plot(1:n, R.square, type="b", xlab="Number of Clusters",
ylab="R-square"); abline(h=1,col=2,lty="dashed")
return(data.frame(No.of.clusters=c(1:n), R.square=R.square))
}
沒有留言:
張貼留言