# 肠型分析学习

## 首先下载测试数据，

```wget http://enterotyping.embl.de/MetaHIT_SangerSamples.genus.txt
wget http://enterotyping.embl.de/enterotypes_tutorial.sanger.R```

## 跑跑示例数据，排排错

```#Uncomment next two lines if R packages are already installed
#install.packages("cluster")
#install.packages("clusterSim")
library(cluster)
library(clusterSim)
#BiocManager::install("genefilter")
#setwd('<path_to_working_directory>')
data=data[-1,]dist.JSD <- function(inMatrix, pseudocount=0.000001, ...) {
KLD <- function(x,y) sum(x *log(x/y))
JSD<- function(x,y) sqrt(0.5 * KLD(x, (x+y)/2) + 0.5 * KLD(y, (x+y)/2))
matrixColSize <- length(colnames(inMatrix))
matrixRowSize <- length(rownames(inMatrix))
colnames <- colnames(inMatrix)
resultsMatrix <- matrix(0, matrixColSize, matrixColSize) inMatrix = apply(inMatrix,1:2,function(x) ifelse (x==0,pseudocount,x)) for(i in 1:matrixColSize) {
for(j in 1:matrixColSize) {
resultsMatrix[i,j]=JSD(as.vector(inMatrix[,i]),
as.vector(inMatrix[,j]))
}
}
colnames -> colnames(resultsMatrix) -> rownames(resultsMatrix)
as.dist(resultsMatrix)->resultsMatrix
attr(resultsMatrix, "method") <- "dist"
return(resultsMatrix)
}data.dist=dist.JSD(data)pam.clustering=function(x,k) { # x is a distance matrix and k the number of clusters
require(cluster)
cluster = as.vector(pam(as.dist(x), k, diss=TRUE)\$clustering)
return(cluster)
}data.cluster=pam.clustering(data.dist, k=3)require(clusterSim)
nclusters = index.G1(t(data), data.cluster, d = data.dist, centrotypes = "medoids")nclusters=NULLfor (k in 1:20) {
if (k==1) {
nclusters[k]=NA
} else {
data.cluster_temp=pam.clustering(data.dist, k)
nclusters[k]=index.G1(t(data),data.cluster_temp,  d = data.dist,
centrotypes = "medoids")
}
}plot(nclusters, type="h", xlab="k clusters", ylab="CH index",main="Optimal number of clusters")obs.silhouette=mean(silhouette(data.cluster, data.dist)[,3])
cat(obs.silhouette) #0.1899451#data=noise.removal(data, percent=0.01)## plot 1
obs.pca=dudi.pca(data.frame(t(data)), scannf=F, nf=10)
obs.bet=bca(obs.pca, fac=as.factor(data.cluster), scannf=F, nf=k-1)
dev.new()
s.class(obs.bet\$ls, fac=as.factor(data.cluster), grid=F,sub="Between-class analysis", col=c(3,2,4))#plot 2
obs.pcoa=dudi.pco(data.dist, scannf=F, nf=3)
dev.new()
s.class(obs.pcoa\$li, fac=as.factor(data.cluster), grid=F,sub="Principal coordiante analysis", col=c(3,2,4))```

## 上图，稍微调整下

`, col=c(3,2,4)`这个代码是给三个聚类上不同的颜色，还没搞清楚怎么给画的圈上色来实现理好看的效果，相信对于熟悉R语言的同学是小菜一碟。`, cell=0, cstar=0`是不显示圈和边线，只显示散点。 不加这两个参数，只用上面的代码，图如下：