模式识别c均值算法的实现(C++实现)

来源:百度文库 编辑:神马文学网 时间:2024/05/24 06:39:12
模式识别c均值算法的实现(C++实现)

实验目的:

实验原理:

实验内容:

写程序实现c均值算法,并用表中的三维数据进行测试,下面给出了每种测试的类别数目和初始值。

的结果与(3)中的结果进行比较,并解释差别,包括迭代次数的差别。

 

 

 

实验代码:

       CCMean(CData *pdata,CData *pmean);

    void work(int InitClassNum);

    void CalcuMean( int i );//计算第i类的均值

计算第i类的误差

初始化分类

将第i类样本移动到第k类中,如果返回true这,总误差变小,否则不移动

    bool MoveItoK( const CData& da, int i, int &k );

    double  dist( const CData& mean, const CData& da);

指针指向样本数据地址

指针指向初始化分类重心数据地址

各样本的误差君方根

    list< CData >* pcla[DATANUM];

CCMean::CCMean(CData *pdata)

    for(int i = 0; i < DATANUM; i ++ )

       pcla[i] = new list< CData >;

       assert( pcla[i] != 0 );

CCMean::CCMean(CData *pdata,CData *pmean)

    for(int i = 0; i < DATANUM; i ++ )

       pcla[i] = new list< CData >;

       assert( pcla[i] != 0 );

    for(int i = 0; i < DATANUM; i ++ )

    for(int i = 0; i < DATANUM; i ++ )

void CCMean::CalcuMean(int ii)

    double sum1 = 0.0, sum2 = 0.0,sum3 = 0.0;

    int si = (int)pcla[ii]->size();

    list< CData >::iterator iter = pcla[ii]->begin();

    for(int i = 0; i < si; i ++ )

          sum3 += iter->x3;

    mean[ii].x1 = (double)sum1 / si;

    mean[ii].x2 = (double)sum2 / si;

       mean[ii].x3 = (double)sum3 / si;

    for( int i = 0; i < iClassNum ; i ++ )

void CCMean::CalcuJc( int index )

    list< CData >::iterator iter = pcla[index]->begin();

    int si = (int)pcla[index]->size();

    for( int i = 0; i < si; i ++)

       jc[index] += dist( mean[index], *iter );

double CCMean::dist(const CData& mean, const CData& da)

    return (mean.x1 - da.x1)*(mean.x1 - da.x1) + (mean.x2 - da.x2)*(mean.x2 - da.x2) + (mean.x3 - da.x3)*(mean.x3 - da.x3);

       CData *pmean = pMean;

       for( int ii = 0; ii < iClassNum; ii ++ )

初始化类别重心数组

如果是没有给定初始化的分类重心,可以加上下面的这段代码

//    for( int i = 0; i < iClassNum; i ++ )

//       pcla[i]->push_back( *ptem );

    for( int i = 0; i < DATANUM; i ++ )

       double mindis = MAXDIST;

       for( int j = 0; j < iClassNum; j ++ )

           double curdis = dist( pData[i], mean[j] );

           if( curdis < mindis )

              mindis = curdis;

       pcla[pos]->push_back( pData[i] );

    for( int j = 0; j < iClassNum ; j ++ )

bool CCMean::MoveItoK( const CData &da, int i , int& k )

    for( int j = 0; j < iClassNum; j ++ )

       int si = (int)pcla[j]->size();

           Pj = dist( mean[j], da ) * si/(si - 1);

           Pj = dist( mean[j], da ) * si/(si + 1);

       else if ( Pj == Pk  && j == i )

当 Pj == Pk && j == i, 移动

    pcla[k]->push_back( da );

从第i类中删除da,但是首先从链表中找到他的位置

    list< CData >::iterator iter = pcla[i]->begin();

    while( iter != pcla[i]->end() )

       if( iter->x1 == da.x1 && iter->x2 == da.x2 && iter->x3 == da.x3 )

    pcla[i]->erase( iter );

    for( int i = 0; i < iClassNum ; i ++ )

类别

重心点为: ("<

       list< CData >::iterator iter = pcla[i]->begin();

       while( iter != pcla[i]->end() )

                 cout<<"("<x1<<","<x2<<","<x3<<")   "<

           if( j++ % 5 == 0)

             cout<

void CCMean::work(int InitClassNum)

    iClassNum = InitClassNum;

用来判断迭代是否停止

    for( int i = 0; i < iClassNum ; i ++ )

       int si = (int)pcla[i]->size();

       list< CData >::iterator iter = pcla[i]->begin();

       for(int j = 0; j < (int)pcla[i]->size(); j++)

                 CData da = *iter;

           if( MoveItoK( da , i, k ) == true )

              double OldJe = je;

              CalcuMean( i );

              CalcuMean( k );

              if( OldJe > je )

                      count++;

                  counter = 0;

                  goto Again;

           if( counter == DATANUM )

最后总误差 Je 为

迭代次数是:"<

     {-7.82,-4.58,-3.97},{-6.68,3.16,2.71},{4.36,-2.19,2.09},{6.72,0.88,2.80},{-8.64,3.06,3.50},

       {-6.87,0.57,-5.45},{4.47,-2.62,5.76},{6.73,-2.01,4.18},{-7.71,2.34,-6.33},{-6.91,-0.49,-5.68},

       {6.18,2.81,5.82},{6.72,-0.93,-4.04},{-6.25,-0.26,0.56},{-6.94,-1.22,1.13},{8.09,0.20,2.25},

       {6.18,0.17,-4.15},{-5.19,4.24,4.04},{-6.38,-1.74,1.43},{4.08,1.30,5.33},{6.27,0.93,-2.78}

CData m11[2] = {{1,1,1},{-1,1,-1}};

CData m12[2] = {{0,0,0},{1,1,-1}};

CData m21[3] = {{0,0,0},{-1,1,-1},{-1,0,2}};

CData m22[3] = {{-0.1,0,0.1},{0,-0.1,0.1},{-0.1,-0.1,0.1}};

CData m31[4] = {{-0.1,0,0.1},{0,-0.1,0.1},{-0.1,-0.1,0.1},{0.2,1,0}};

int main(int argc, char* argv[])

测试

    CCMean cmean11( yy,m11 );

测试

       CCMean cmean12( yy,m12 );

测试

       CCMean cmean21( yy,m21 );

测试

       CCMean cmean22( yy,m22 );

实验结果:


重心点为

重心点为

重心点为: (-6.83667,3.48667,3.41667)

重心点为: (-6.98286,-0.768571,-2.61571)


结果分析:初始的类别重心选择越与实际的的重心接近,则迭代次数越少,划分的类别越多则迭代的次数也越少,当分成N类,则迭代次数是零。