课程设计——聚类分析的初步实践
搞完了两周的课程设计,学到了不少的知识。
1、k-means算法:
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#define max 19
#define EPS 0.001
int k;
typedef struct
{
doublex1,x2,x3,x4,ks;//ks is short for k_distance
inttag;
}cluster;
cluster clu[max];
void getnumber(int n)//输入数据
{
int i;
FILE *pcluster;
if((pcluster=fopen("data.txt","r"))==NULL)
{
printf("\nopen file is failed\n");
exit(1);
}
rewind(pcluster);
for(i=0;i<n;i++)
fscanf(pcluster,"%lf%lf%lf%lf",&clu[i].x1,&clu[i].x2,&clu[i].x3,&clu[i].x4);
fclose(pcluster);
}
void output(int n)//输出数据
{
int i,j;
for(i=0;i<k;i++)
{
printf("\n第%d类:",i+1);
for (j=0;j<n;j++)
{
if (clu[j].tag==i+1)
printf(" %d",j+1);
}
putchar('\n');
}
}
double square(double a,double b)//计算欧几里德距离的平方函数
{
returnfabs(a-b)*fabs(a-b);
}
double calculate_distance(cluster num1,clusternum2)//计算欧几里德距离
{
doubletemp;
temp=square(num1.x1,num2.x1)+square(num1.x2,num2.x2)+square(num1.x3,num2.x3)+square(num1.x4,num2.x4);
returnsqrt(temp);
}
int equal(cluster * center,cluster *centercopy)//判断新重心与老重心是否相等,如果相等,则收敛
{
inti,temp;
for(i=0;i<k;i++)
{
temp=calculate_distance(centercopy[i],center[i]);
if (temp>EPS)
{
return 0;
}
}
return1;
}
void copycenter(cluster * center,cluster *centercopy)//将新的重心拷贝用于收敛的比较
{
int i;
for(i=0;i<k;i++)
{
centercopy[i]=center[i];
}
}
void UpdateCluster(cluster * simple,cluster * center,const intn)//将点加入新的类中
{
inti,j;
doubleks;
for(i=0;i<n;i++)//UpdateCluster
{
simple[i].ks=calculate_distance(simple[i],center[0]);
simple[i].tag=center[0].tag;
for (j=1;j<k;j++)
{
ks = calculate_distance(simple[i],center[j]);
if (ks<clu[i].ks)
{
simple[i].ks=ks;
simple[i].tag=center[j].tag;
}
}
}
}
void UpdateCenter(cluster * simple,cluster * center,const intn)//更新新类的重心,即新类的算术平均
{
intc[k],i,j;
clustera[k];
for (i=0;i< k;i++)//UpdateCenter
{
a[i].x1=a[i].x2=a[i].x3=a[i].x4=0;
c[i]=0;
for (j=0;j < n;j++)
{
if (simple[j].tag==center[i].tag)
{
c[i]++;
a[i].x1+=simple[j].x1;
a[i].x2+=simple[j].x2;
a[i].x3+=simple[j].x3;
a[i].x4+=simple[j].x4;
}
}
center[i].x1= a[i].x1/c[i];
center[i].x2= a[i].x2/c[i];
center[i].x3= a[i].x3/c[i];
center[i].x4= a[i].x4/c[i];
}
}
int main()
{
intcount=0,n,flag=1;//1 for false,0 for ture
int i;
printf("Please input the number of the data:\n");
scanf("%d",&n);
printf("Howmany kinds do you want:\n");
scanf("%d",&k);
clusterclu_center[k],clu_center_copy[k];
getnumber(n);
for(i=0;i<k;i++)//InitCenter
{
clu_center[i]=clu[i];
clu_center[i].tag=i+1;
}
copycenter(clu_center,clu_center_copy);
while(flag)//迭代
{
UpdateCluster(clu,clu_center,n);//将点划入新类
UpdateCenter(clu,clu_center,n);//计算新类重心
if (equal(clu_center,clu_center_copy))
{
flag=0;
}
else
{
copycenter(clu_center,clu_center_copy);
count++;//计算迭代次数
}
}
printf("\n总共迭代了%d次\n",count);
output(n);
return0;
}
2、凝聚分层聚类
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <string.h>
#define max 100
#define m 2*n-1
int n;
typedef struct
{
doublex1,x2,x3,x4,space;
inttag;
intlchild;//记住左儿子的下标
intrchild;//记住右儿子的下标
intparent;//记住 父亲 的下标
charlayer[max];//记住所在的层,最下一层为1层
}cluster;
cluster clu[max];
void Getnumber(int n)//输入数据
{
int i;
FILE *pcluster;
if((pcluster=fopen("data.txt","r"))==NULL)
{
printf("\nopen file is failed\n");
exit(1);
}
rewind(pcluster);
for(i=0;i<n;i++)
fscanf(pcluster,"%lf%lf%lf%lf",&clu[i].x1,&clu[i].x2,&clu[i].x3,&clu[i].x4);
fclose(pcluster);
}
double Square(double a,double b)//欧几里德距离用的平方函数
{
returnfabs(a-b)*fabs(a-b);
}
double Calculate_distance(cluster num1,clusternum2)//计算欧几里德距离
{
doubletemp;
temp=Square(num1.x1,num2.x1)+Square(num1.x2,num2.x2)+Square(num1.x3,num2.x3)+Square(num1.x4,num2.x4);
returnsqrt(temp);
}
void Initial(cluster * data)//对数据进行初始化
{
int i;
for(i=0;i<m;i++)
{
data[i].parent=data[i].lchild=data[i].rchild=-1;
data[i].space=0;
data[i].tag=i+1;
}
}
double Space_matrix(cluster * data_rest,int row,int *num1,int*num2)//calculate the minimum space,and return the index
{
int i=0,j=i+1;
doubletemp,min=987654321;
for(i=0;i<=row;i++)
{
for (j=i+1;j<=row;j++)
{
if(data_rest[i].parent==-1&&data_rest[j].parent==-1)//????
{
temp=Calculate_distance(data_rest[i],data_rest[j]);
if (temp<min)
{
min=temp;
*num1=i;
*num2=j;
}
}
}
}
returnmin;
}
void Updatacenter(cluster * center,int parent,int lchild,intrchild)//计算新的重心
{
center[parent].x1=(center[lchild].x1+center[rchild].x1)/2;
center[parent].x2=(center[lchild].x2+center[rchild].x2)/2;
center[parent].x3=(center[lchild].x3+center[rchild].x3)/2;
center[parent].x4=(center[lchild].x4+center[rchild].x4)/2;
}
void CreateTree(cluster * center)//根据凝聚的思想,构建huffman tree
{
inti,p1,p2;
for(i=n;i<m;i++)
{
center[i].space=Space_matrix(center,i-1,&p1,&p2);//theminimum space
center[i].lchild=p1;
center[i].rchild=p2;
center[p1].parent=center[p2].parent=center[i].tag;
Updatacenter(center,i,p1,p2);
}
}
void Output(cluster * T)
{
int c, p,i;
charcd[n+1];
intstart;
memset(cd,'',sizeof(cd));
for ( i =0;i <n; i++)
{
start=n;
c=i;
while ( (p=T[c].parent-1)>=0)
{
cd[--start]=(T[p].lchild==c)? '0' : '1';
c=p;
}
strcpy(T[i].layer,&cd[start]);
printf("%-10d----------%10s\n",T[i].tag,T[i].layer);
}
}
int main()
{
printf("Please input the number of the data:\n");
scanf("%d",&n);
Getnumber(n);
Initial(clu);
CreateTree(clu);
printf("Thehuffman codes are:\n");
Output(clu);
return0;
}
以上两个算法不足之处在于只是在四维空间实现了算法,要是优化一下的话,可以把四维空间用一个数组来储存坐标,我就不写了,交给后来人吧。
浙公网安备 33010602011771号