K-means 分類算法

data

41 286 357 71 298 426 39 241 350 47 283 399 22 201 449 70 239 363 87 290 449 62 209 417 39 265 415 55 229 363 12 294 442 1 296 378 39 270 380 7 228 379 5 277 385 71 215 442 75 243 424 13 280 414 40 244 380 80 234 429 55 268 435 1 275 434 2 296 433 45 204 446 51 226 389 78 279 370 79 217 407 54 273 426 48 247 400 41 252 423 50 276 362 22 283 444 

code

#include<stdio.h>
#include<stdlib.h>
#include<stdarg.h>
#include<math.h>

#define DONE NULL
#define THRESHOLD 0.01

enum {
    N_CLUSTERS = 3,
    MAX_ITERATIONS = 25,
    MAX_DISTANCE = 100,
};

typedef struct _kmeans_t {
    size_t n;
    size_t k;
    double *X;
    double *C;
    size_t *A;
    FILE *ifp;
    FILE *ofp;
} kmeans_t;

/* funcs */
void bye (void);

size_t belongs_to (double *c, size_t nc, double *v);

FILE *getifp (const char *fn);
FILE *getofp (const char *fn);

/* tasts */
void do_tasks (kmeans_t *km, ...);

void km_pipein (kmeans_t *km);
void km_init_clusters (kmeans_t *km);
void km_reassignment (kmeans_t *km);
void km_reclustering (const kmeans_t *km, double *c);
void km_loop (kmeans_t *km);
void km_pipeout (kmeans_t *km);
void km_freeall (kmeans_t *km);


/* main */
int main (void)
{
    atexit (bye); /* we must say goodbye before we exit */

    kmeans_t km = {
        .n = 0,
        .k = 3,
        .X = NULL,
        .A = NULL,
        .ifp = getifp ("data2.txt"),
        .ofp = getofp ("data-out.txt") 
    };

    /* task to do */
    do_tasks (&km,
              km_pipein,
              km_init_clusters,
              km_loop,
              km_pipeout,
              km_freeall,
              DONE
             );

    /* exit */
    exit (EXIT_SUCCESS);
}


/* funcs */
void bye (void)
{
    printf ("bye bye\n");
    getc (stdin);
    printf ("bye~\n");
}

size_t belongs_to (double *c, size_t nc, double *v)
{
    size_t i;
    size_t a; /* anchor */

    for (i = 1, a = 0; i < nc; ++i) {
        if ( fabs (*(c+i)-*v) < fabs (*(c+a)-*v) ) {
           a = i;
        }
    }

    return a;
}


FILE *getifp (const char *fn)
{
    FILE *ifp = fopen (fn, "r");
    if (NULL == ifp) {
        fprintf (stderr, "cannot open %s for reading", fn);
        exit (EXIT_FAILURE);
    }
    return ifp;
}

FILE *getofp (const char *fn)
{
    FILE *ofp = fopen (fn, "w");
    if (NULL == ofp) {
        fprintf (stderr, "cannot open %s for writing\n", fn);
        exit (EXIT_FAILURE);
    }
    return ofp;
}

/* tasts */
void do_tasks (kmeans_t *km, ...)
{
    void (*task)() = NULL;

    va_list tasks;
    va_start (tasks, km);
    while (DONE != (
                    task = va_arg ( tasks, void(*)() ) 
                   )
          ) {
        (*task)(km);
    }
    return;
}

void km_pipein (kmeans_t *km)
{
    double tmp;
    size_t n;
    
    while (fscanf (km->ifp, "%lf", &tmp) == 1) ++(km->n);
    rewind (km->ifp);

    km->X = (double *) malloc (km->n*sizeof(double));
    for (n = 0; n < km->n; ++n) {
        fscanf (km->ifp, "%lf", km->X+n);
    }
    return;
}

void km_init_clusters (kmeans_t *km)
{
    size_t k;

    km->C = (double *)  malloc (km->k*sizeof(double));
    km->A = (size_t *)  malloc (km->n*sizeof(size_t));

    for (k = 0; k < km->k; ++k) {
        *(km->C+k) = *(km->X+k);
    }
    return;
}

void km_reassignment (kmeans_t *km)
{
    size_t n;

    for (n = 0; n < km->n; ++n) {
        *(km->A+n) = belongs_to (km->C, km->k, km->X+n);
    }
}


void km_reclustering (const kmeans_t *km, double *c)
{
    size_t n, k;

    for (k = 0; k < km->k; ++k) {
        *(c+k) = 0.0;
    }

    for (n = 0; n < km->n; ++n) {
        *(c+*(km->A+n)) += *(km->X+n);
    }

    for (k = 0; k < km->k; ++k) {
        *(c+k) /= (double)km->k;
    }

    return;
}

void km_loop (kmeans_t *km)
{
    size_t n;
    size_t k;
    size_t i;

    double diff;

    double *c = (double *) malloc (km->k*sizeof(double));

    km_reassignment (km);

    /* loops */
    for (i = 0; i < MAX_ITERATIONS; ++i) {
        km_reclustering (km, c);
        diff = 0.0;
        for (k = 0; k < km->k; ++k) {
            diff += fabs (*(km->C+k) - *(c+k));
        }

        if (diff < THRESHOLD) {
            break;
        }

        km_reassignment (km);
    } // end for

    free (c);

    return;
}

void km_pipeout (kmeans_t *km)
{
    size_t n;
    size_t k;
    
    fprintf (km->ofp, "--overall:\n");
    for (n = 0; n < km->n; ++n) {
        fprintf (km->ofp, "%10sP[%2d]: %10.4lf, cluster: %4d\n", "",
                 n,
                 *(km->X+n),
                 *(km->A+n)
                );
    }

    for (k = 0; k < km->k; ++k) {
        fprintf (km->ofp, "%10sC[%2d]: center: %10.4lf\n", "",
                 k,
                 *(km->C+k)
                );
    }

    return;
}

void km_freeall (kmeans_t *km)
{
    fclose (km->ifp);
    fclose (km->ofp);

    free (km->X);
    free (km->A);
    free (km->C);

    return;
}

GCC 編譯結果

--overall:
          P[ 0]:    41.0000, cluster:    0
          P[ 1]:   286.0000, cluster:    1
          P[ 2]:   357.0000, cluster:    2
          P[ 3]:    71.0000, cluster:    0
          P[ 4]:   298.0000, cluster:    1
          P[ 5]:   426.0000, cluster:    2
          P[ 6]:    39.0000, cluster:    0
          P[ 7]:   241.0000, cluster:    1
          P[ 8]:   350.0000, cluster:    2
          P[ 9]:    47.0000, cluster:    0
          P[10]:   283.0000, cluster:    1
          P[11]:   399.0000, cluster:    2
          P[12]:    22.0000, cluster:    0
          P[13]:   201.0000, cluster:    1
          P[14]:   449.0000, cluster:    2
          P[15]:    70.0000, cluster:    0
          P[16]:   239.0000, cluster:    1
          P[17]:   363.0000, cluster:    2
          P[18]:    87.0000, cluster:    0
          P[19]:   290.0000, cluster:    1
          P[20]:   449.0000, cluster:    2
          P[21]:    62.0000, cluster:    0
          P[22]:   209.0000, cluster:    1
          P[23]:   417.0000, cluster:    2
          P[24]:    39.0000, cluster:    0
          P[25]:   265.0000, cluster:    1
          P[26]:   415.0000, cluster:    2
          P[27]:    55.0000, cluster:    0
          P[28]:   229.0000, cluster:    1
          P[29]:   363.0000, cluster:    2
          P[30]:    12.0000, cluster:    0
          P[31]:   294.0000, cluster:    1
          P[32]:   442.0000, cluster:    2
          P[33]:     1.0000, cluster:    0
          P[34]:   296.0000, cluster:    1
          P[35]:   378.0000, cluster:    2
          P[36]:    39.0000, cluster:    0
          P[37]:   270.0000, cluster:    1
          P[38]:   380.0000, cluster:    2
          P[39]:     7.0000, cluster:    0
          P[40]:   228.0000, cluster:    1
          P[41]:   379.0000, cluster:    2
          P[42]:     5.0000, cluster:    0
          P[43]:   277.0000, cluster:    1
          P[44]:   385.0000, cluster:    2
          P[45]:    71.0000, cluster:    0
          P[46]:   215.0000, cluster:    1
          P[47]:   442.0000, cluster:    2
          P[48]:    75.0000, cluster:    0
          P[49]:   243.0000, cluster:    1
          P[50]:   424.0000, cluster:    2
          P[51]:    13.0000, cluster:    0
          P[52]:   280.0000, cluster:    1
          P[53]:   414.0000, cluster:    2
          P[54]:    40.0000, cluster:    0
          P[55]:   244.0000, cluster:    1
          P[56]:   380.0000, cluster:    2
          P[57]:    80.0000, cluster:    0
          P[58]:   234.0000, cluster:    1
          P[59]:   429.0000, cluster:    2
          P[60]:    55.0000, cluster:    0
          P[61]:   268.0000, cluster:    1
          P[62]:   435.0000, cluster:    2
          P[63]:     1.0000, cluster:    0
          P[64]:   275.0000, cluster:    1
          P[65]:   434.0000, cluster:    2
          P[66]:     2.0000, cluster:    0
          P[67]:   296.0000, cluster:    1
          P[68]:   433.0000, cluster:    2
          P[69]:    45.0000, cluster:    0
          P[70]:   204.0000, cluster:    1
          P[71]:   446.0000, cluster:    2
          P[72]:    51.0000, cluster:    0
          P[73]:   226.0000, cluster:    1
          P[74]:   389.0000, cluster:    2
          P[75]:    78.0000, cluster:    0
          P[76]:   279.0000, cluster:    1
          P[77]:   370.0000, cluster:    2
          P[78]:    79.0000, cluster:    0
          P[79]:   217.0000, cluster:    1
          P[80]:   407.0000, cluster:    2
          P[81]:    54.0000, cluster:    0
          P[82]:   273.0000, cluster:    1
          P[83]:   426.0000, cluster:    2
          P[84]:    48.0000, cluster:    0
          P[85]:   247.0000, cluster:    1
          P[86]:   400.0000, cluster:    2
          P[87]:    41.0000, cluster:    0
          P[88]:   252.0000, cluster:    1
          P[89]:   423.0000, cluster:    2
          P[90]:    50.0000, cluster:    0
          P[91]:   276.0000, cluster:    1
          P[92]:   362.0000, cluster:    2
          P[93]:    22.0000, cluster:    0
          P[94]:   283.0000, cluster:    1
          P[95]:   444.0000, cluster:    2
          C[ 0]: center:    41.0000
          C[ 1]: center:   286.0000
          C[ 2]: center:   357.0000

VS2010 編譯

沒有結果,已花樣作死,死絕。

我不應該亂玩函數指針。。。


log: $1: 考研復試階段;
最后編輯于
?著作權歸作者所有,轉載或內容合作請聯系作者
平臺聲明:文章內容(如有圖片或視頻亦包括在內)由作者上傳并發布,文章內容僅代表作者本人觀點,簡書系信息發布平臺,僅提供信息存儲服務。

推薦閱讀更多精彩內容