C - Program to Implement Single Pass Algorithm for Clustering
#include<stdio.h>
struct a
{
char word[50][20];
int nword;
};
struct c
{
int cluster[5];
int set;
int count;
};
struct c clust[5];
struct a doc[5];
struct a stopword[24];
FILE *flist,*f1,*stop;
char buffer[15],buffer1[15];
int i,n,k,j,wcount,flag=0,matc=0,chflag,ch;
float mat[5][5];
void access(FILE *flist)
{
k=0;
while(!feof(flist))
{
fscanf(flist,"%s",&buffer);
f1=fopen(buffer,"r");
j=0;
while(!feof(f1))
{
flag=0;
fscanf(f1,"%s",&buffer1);
for(i=0;i<wcount;i++)
{
if(stricmp(buffer1,stopword[i].word[0])==0)
{
flag=1;
}
}
if(flag==0)
{
strcpy(doc[k].word[j],buffer1);
j++;
}
}
doc[k].nword=j;
k++;
}
fclose(flist);
}
void displaydoc()
{
printf("\n\n\n");
for(i=0;i<5;i++)
{ printf("\n Document %d\n\n",i+1);
n=doc[i].nword;
for(j=0;j<n;j++)
{
printf("%s ",doc[i].word[j]);
}
printf("\n\n\n");
}
getch();
}
void getstopword()
{
stop=fopen("d:\\sp\\word.txt","r");
k=0;
while(!feof(stop))
{
fscanf(stop,"%s",&stopword[k].word[0]);
k++;
}
wcount=k;
}
void cluster()
{
int l,k,i,tot,cnt,cntn,counts=0;
float result;
clrscr();
printf("\n\n\n\t\t\tCluster classification\n");
for(i=0;i<5;i++)
{
cnt=doc[i].nword;
printf("%d ",i+1);
for(j=0;j<i;j++)
{
cntn=doc[j].nword;
for(k=0;k<cnt;k++)
{
for(l=0;l<cntn;l++)
{
if(strcmp(doc[i].word[k],doc[j].word[l])==0)
{
counts++;
}
}
}
tot=cnt+cntn;
result=(float)counts/tot;
counts=0;
printf("%f\t",result);
mat[i][j]=result;
}
printf("\n\n");
}
i=1;
printf("\n\n%4d %12d %15d %15d %15d",i,i+1,i+2,i+3,i+4);
getch();
}
void printmat()
{
int cn,clustno=0,i,rowflag;
for(i=0;i<5;i++)
{
clust[i].set=0;
clust[i].count=0;
}
clust[0].set=1;
for(i=0;i<5;i++)
{
rowflag=0;
for(j=0;j<i;j++)
{
if(mat[i][j]>=0.25)
{
if(clust[j].set==1)
{
clust[j].cluster[clust[j].count]=i;
clust[j].count++;
rowflag=1;
}
else
{
clust[i].set=1;
clust[i].cluster[clust[i].count]=i;
clust[i].count++;
rowflag=1;
}
}
}
if(rowflag==0)
{
clust[i].set=1;
clust[i].cluster[clust[i].count]=i;
clust[i].count++;
}
}
printf("\n\nCluster of Documents are\n\n");
for(i=0;i<5;i++)
{
for(j=0;j<clust[i].count;j++)
{
printf("%d ",clust[i].cluster[j]+1);
}
printf("\n");
}
}
void main()
{
clrscr();
flist=fopen("d:\\sp\\flist.txt","r");
getstopword(); //function for accept stop word
getch();
access(flist); //function for access document and remove stopword
getch();
displaydoc(); //function for display document contents
getch();
cluster(); //function for create cluster matrix
getch();
printmat(); //function for display cluster
getch();
}