多變量分析 [R]：學習筆記: K-Means Clustering

2013年9月10日星期二

K-Means Clustering

### K-Means Clustering

## 標準化資料 standardize variables: scale()
x <- matrix(1:10, ncol=2) # column centering and then scaling
cov(centered.scaled.x <- scale(x)) # all 1
(centered.x <- scale(x,center=TRUE,scale=FALSE)) # 只減掉平均值

# a 2-dim. K-means clustering example
x <- rbind(matrix(rnorm(100, sd = 0.3), ncol = 2),
           matrix(rnorm(100, mean = 1, sd = 0.3), ncol = 2))
colnames(x) <- c("x", "y")

k.cl <- kmeans(x, centers=2) # K-means clustering

# 以不同顏色畫分群後的data(Bivariate)
# 先利用plot( , type="n" )將資料的範圍先畫出來，
# 再利用text( )把每一個資料的名稱都點出
plot(x, type="n")
text(x, col=k.cl$cluster, labels=row.names(x))
points(k.cl$centers, col = 1:2, pch = 8, cex=2) # 畫出各群中心點

## Get cluster Means
aggregate(x,by=list(k.cl$cluster),FUN=mean)
k.cl$centers

### Determine number of clusters
## Within groups sum of squares (SSW)
SSW <- function(data){
   n <- nrow(data)-1
   ssw <- (nrow(data)-1)*sum(apply(data,2,var))
   for (i in 2:n) ssw[i] <- sum(kmeans(data, centers=i)$withinss)
   plot(1:n, ssw, type="b", xlab="Number of Clusters",
      ylab="Within groups sum of squares")
   return(data.frame(No.of.clusters=c(1:n), SSW=ssw))
}

## R square scree plot
R.square.km <- function(data){
   n <- nrow(data)-1
   ssw <- (nrow(data)-1)*sum(apply(data,2,var))
   for (i in 2:n) ssw[i] <- sum(kmeans(data, centers=i)$withinss)
   ss <- function(x) sum(scale(x, scale = FALSE)^2) # sum of squares
   R.square <- 1-(ssw/ss(data))
   plot(1:n, R.square, type="b", xlab="Number of Clusters",
      ylab="R-square"); abline(h=1,col=2,lty="dashed")
   return(data.frame(No.of.clusters=c(1:n), R.square=R.square))
}

網頁

2013年9月10日星期二

K-Means Clustering

沒有留言:

張貼留言

網頁

2013年9月10日 星期二

K-Means Clustering

沒有留言:

張貼留言

2013年9月10日星期二