gopher-clustering.c - brcon2024-hackathons - Bitreichcon 2024 Hackathons
 (HTM) git clone git://bitreich.org/brcon2024-hackathons git://enlrupgkhuxnvlhsf6lc3fziv5h2hhfrinws65d7roiv6bfj7d652fid.onion/brcon2024-hackathons
 (DIR) Log
 (DIR) Files
 (DIR) Refs
 (DIR) Tags
 (DIR) Submodules
       ---
       gopher-clustering.c (48223B)
       ---
            1 #define _POSIX_C_SOURCE 200112L
            2 
            3 #include <assert.h>
            4 #include <stdarg.h>
            5 #include <stdint.h>
            6 #include <stdio.h>
            7 #include <stdlib.h>
            8 #include <string.h>
            9 
           10 #include <math.h>
           11 
           12 #include <netdb.h>
           13 #include <sys/socket.h>
           14 #include <sys/types.h>
           15 #include <termios.h>
           16 #include <unistd.h>
           17 
           18 uint32_t
           19 fnv1a(int n,...)
           20 {
           21         int i;
           22         char *s;
           23         va_list l;
           24         uint32_t h;
           25 
           26         h = 0x811c9dc5;
           27 
           28         va_start(l, n); 
           29         for (i = 0; i < n; i++) {
           30                 for (s = va_arg(l, char*); *s; s++) {
           31                         h ^= *s;
           32                         h *= 0x01000193;
           33                 }
           34         }
           35         va_end(l);
           36 
           37         return h;
           38 }
           39 
           40 uint32_t
           41 xorshift(uint32_t *s)
           42 {
           43         *s ^= *s << 13;
           44         *s ^= *s >> 17;
           45         *s ^= *s << 5;
           46         return *s;
           47 }
           48 
           49 /*
           50         https://github.com/skeeto/hash-prospector/issues/19#issuecomment-1120105785
           51 */
           52 uint32_t
           53 mix(uint32_t x)
           54 {
           55         x ^= x >> 16;
           56         x *= 0x21f0aaad;
           57         x ^= x >> 15;
           58         x *= 0x735a2d97;
           59         x ^= x >> 16;
           60         return x;
           61 }
           62 
           63 struct cell {
           64         void *data;
           65         char c;
           66 };
           67 
           68 #define MAPHEIGHT (50)
           69 #define MAPWIDTH (160)
           70 struct cell map[MAPHEIGHT][MAPWIDTH];
           71 
           72 struct gopherentry {
           73         struct gopherentry *next;
           74         char type;
           75         char *fields[4];
           76         size_t i, s;
           77 };
           78 
           79 char *
           80 gopher_fieldend(char *s)
           81 {
           82         for (; *s; s++) {
           83                 if (*s == '\t')
           84                         break;
           85                 if (*s == '\r' && *(s+1) == '\n')
           86                         break;
           87                 if (*s == '\n')
           88                         break;
           89         }
           90         return s;
           91 }
           92 
           93 char *
           94 gopher_nextentry(char *s)
           95 {
           96         char *c;
           97 
           98         if (c = strchr(s, '\n'))
           99                 return c + 1;
          100         return NULL;
          101 }
          102 
          103 void *
          104 xmalloc(size_t s)
          105 {
          106         void *p;
          107 
          108         if (!(p = malloc(s))) {
          109                 perror(NULL);
          110                 exit(1);
          111         }
          112         return p;
          113 }
          114 
          115 void *
          116 xrealloc(void *p, size_t s)
          117 {
          118         if (!(p = realloc(p, s))) {
          119                 perror(NULL);
          120                 exit(1);
          121         }
          122         return p;
          123 }
          124 
          125 /* Allow a lot of ugly things... */
          126 size_t
          127 gopher_parsemenu(char *d, struct gopherentry **g)
          128 {
          129         ssize_t gl;
          130         size_t fl, i, s;
          131         char *c, *p, pt;
          132         struct gopherentry *cg;
          133 
          134         gl = 0;
          135         cg = NULL;
          136 
          137         s = 0;
          138         pt = 0;
          139         for (c = d; c && *c; c = gopher_nextentry(c)) {
          140                 if (*c == '.')
          141                         break;
          142                 if (*c == '\n')
          143                         continue;
          144 
          145                 gl++;
          146                 cg = xrealloc(cg, gl * sizeof(struct gopherentry));
          147                 memset(&cg[gl-1], 0, sizeof(struct gopherentry));
          148 
          149                 if (*c != 'i' && pt == 'i')
          150                         s++;
          151                 pt = *c;
          152 
          153                 cg[gl-1].type = *c;
          154                 cg[gl-1].i = gl-1;
          155                 cg[gl-1].s = s;
          156                 c++;
          157 
          158                 for (i = 0; i < 4; i++) {
          159                         p = gopher_fieldend(c);
          160                         fl = p - c;
          161                         cg[gl-1].fields[i] = xmalloc(fl + 1);
          162                         memcpy(cg[gl-1].fields[i], c, fl);
          163                         cg[gl-1].fields[i][fl] = '\0';
          164                         if (*p != '\t')
          165                                 break;
          166                         c = p + 1;
          167                 }
          168         }
          169         s++;
          170         *g = cg;
          171         return gl;
          172 }
          173 
          174 ssize_t
          175 gopher_removeentries(struct gopherentry *g, size_t l)
          176 {
          177         size_t i, j;
          178 
          179         for (i = 0; i < l;) {
          180                 if ((g[i].type == 'i' && g[i].type == '3') || !g[i].fields[1] || !g[i].fields[2] || !g[i].fields[3]) {
          181                         l--;
          182                         memmove(&g[i], &g[i+1], (l - i) * sizeof(*g));
          183                         continue;
          184                 }
          185                 for (j = 0; j < i; j++) {
          186                         if (!strcmp(g[i].fields[1], g[j].fields[1]) &&
          187                             !strcmp(g[i].fields[2], g[j].fields[2]) &&
          188                             !strcmp(g[i].fields[3], g[j].fields[3]))
          189                                 break;
          190                 }
          191                 if (j < i) {
          192                         l--;
          193                         memmove(&g[i], &g[i+1], (l - i) * sizeof(*g));
          194                         continue;
          195                 }
          196                 i++;
          197         }
          198 
          199         return l;
          200 }
          201 
          202 ssize_t
          203 gopher_request(char *host, char *port, char *selector, char *buffer, size_t l)
          204 {
          205         int fd;
          206         struct addrinfo hints;
          207         struct addrinfo *r, *rp;
          208         ssize_t n, result, rn;
          209 
          210         memset(&hints, 0, sizeof(hints));
          211         hints.ai_family = AF_UNSPEC;
          212         hints.ai_socktype = SOCK_STREAM;
          213 
          214         if (getaddrinfo(host, port, &hints, &r) != 0)
          215                 return -1;
          216 
          217         for (rp = r; rp; rp = rp->ai_next) {
          218                 if ((fd = socket(rp->ai_family, rp->ai_socktype, rp->ai_protocol)) == -1)
          219                         continue;
          220                 if (connect(fd, rp->ai_addr, rp->ai_addrlen) != -1)
          221                         break;
          222                 close(fd);
          223         }
          224         freeaddrinfo(r);
          225         if (!rp)
          226                 return -1;
          227 
          228         result = -1;
          229         if (write(fd, selector, strlen(selector)) != strlen(selector))
          230                 goto cleanup;
          231 
          232         if (write(fd, "\r\n", 2) != 2)
          233                 goto cleanup;
          234 
          235         n = 0;
          236         while (1) {
          237                 if (n + 512 > l - 1)
          238                         goto cleanup;
          239                 if ((rn = read(fd, &buffer[n], 512)) == -1)
          240                         goto cleanup;
          241                 else if (!rn)
          242                         break;
          243                 n += rn;
          244         }
          245 
          246         buffer[n] = '\0';
          247         result = n;
          248 
          249 cleanup:
          250         close(fd);
          251 
          252         return result;
          253 }
          254 
          255 size_t
          256 triangularnumber(size_t n)
          257 {
          258         return (n * n + n) / 2;
          259 }
          260 
          261 size_t
          262 distanceindex(size_t i, size_t j)
          263 {
          264         size_t t;
          265 
          266         if (i < j) {
          267                 t = i;
          268                 i = j;
          269                 j = t;
          270         }
          271 
          272         return triangularnumber(i) + j;
          273 }
          274 
          275 /*
          276         https://en.wikipedia.org/wiki/Levenshtein_distance#Iterative_with_two_matrix_rows
          277         Which references https://www.codeproject.com/Articles/13525/Fast-memory-efficient-Levenshtein-algorithm-2
          278         The code there is using the CPOL: https://www.codeproject.com/info/cpol10.aspx
          279 */
          280 size_t
          281 levenshteindistance(char *a, char *b)
          282 {
          283         size_t i, j, dc, ic, sc, r;
          284         size_t *v0, *v1, *t;
          285 
          286         v0 = xmalloc((strlen(b) + 1) * sizeof(*v0));
          287         v1 = xmalloc((strlen(b) + 1) * sizeof(*v1));
          288 
          289         for (i = 0; i <= strlen(b); i++)
          290                 v0[i] = i;
          291 
          292         for (i = 0; i < strlen(a); i++) {
          293                 v1[0] = i + 1;
          294                 for (j = 0; j < strlen(b); j++) {
          295                         dc = v0[j + 1] + 1;
          296                         ic = v1[j] + 1;
          297                         if (a[i] == b[j])
          298                                 sc = v0[j];
          299                         else
          300                                 sc = v0[j] + 1;
          301                         if (dc < ic && dc < sc)
          302                                 v1[j + 1] = dc;
          303                         else if (ic < dc && ic < sc)
          304                                 v1[j + 1] = ic;
          305                         else
          306                                 v1[j + 1] = sc;
          307                 }
          308                 t = v0;
          309                 v0 = v1;
          310                 v1 = t;
          311         }
          312 
          313         r = v0[strlen(b)];
          314 
          315         free(v1);
          316         free(v0);
          317 
          318         return r;
          319 }
          320 
          321 /*
          322         Remove common prefix and suffix. The pazz0distance is the sum of the length of the leftover strings.
          323 */
          324 size_t
          325 pazz0distance(char *a, char *b)
          326 {
          327         size_t pl, sl;
          328         size_t al, bl;
          329 
          330         al = strlen(a);
          331         bl = strlen(b);
          332 
          333         for (pl = 0; pl < al && pl < bl && a[pl] == b[pl]; pl++); 
          334         for (sl = 0; al - sl > pl && bl - sl > pl && a[al - 1 - sl] == b[bl - 1 - sl]; sl++);
          335 
          336         return al + bl - 2 * (pl + sl);
          337 }
          338 
          339 struct point {
          340         double dims[2];
          341 };
          342 
          343 void
          344 sammon(double *distances, size_t l, uint32_t prng, struct point *points)
          345 {
          346         size_t i, j, k, t, tmax;
          347         double *gnarf;
          348         double c, lr;
          349         double e1, e2, sum, lasterror;
          350         struct point *newpoints;
          351 
          352         gnarf = calloc(triangularnumber(l), sizeof(*gnarf));
          353         newpoints = calloc(l, sizeof(*newpoints));
          354 
          355         for (i = 0; i < l; i++) {
          356                 points[i].dims[0] = (double)xorshift(&prng) / UINT32_MAX - 0.5;
          357                 points[i].dims[1] = (double)xorshift(&prng) / UINT32_MAX - 0.5;
          358         }
          359 
          360         c = 0;
          361         for (i = 0; i < l; i++) {
          362                 gnarf[distanceindex(i, i)] = 0;
          363                 for (j = 0; j < i; j++) {
          364                         gnarf[distanceindex(i, j)] = sqrt(pow(points[i].dims[0] - points[j].dims[0], 2) + pow(points[i].dims[1] - points[j].dims[1], 2));
          365                         c += distances[distanceindex(i, j)];
          366                 }
          367         }
          368 
          369         lasterror = 0;
          370         for (t = 0, tmax = 1 << 12; t < tmax; t++) {
          371                 lr = 0.01;
          372                 for (i = 0; i < l; i++) {
          373                         for (j = 0; j < 2; j++) {
          374                                 e1 = -2. / c;
          375                                 sum = 0;
          376                                 for (k = 0; k < l; k++) {
          377                                         if (i == k)
          378                                                 continue;
          379                                         sum += (distances[distanceindex(i, k)] - gnarf[distanceindex(i, k)]) / (distances[distanceindex(i, k)] * gnarf[distanceindex(i, k)]) * (points[i].dims[j] - points[k].dims[j]);
          380                                 }
          381                                 e1 *= sum;
          382 
          383                                 e2 = -2. / c;
          384                                 sum = 0;
          385                                 for (k = 0; k < l; k++) {
          386                                         if (i == k)
          387                                                 continue;
          388                                         sum += 1. / (distances[distanceindex(i, k)] * gnarf[distanceindex(i, k)]) * ((distances[distanceindex(i, k)] - gnarf[distanceindex(i, k)]) - pow(points[i].dims[j] - points[k].dims[j], 2) / gnarf[distanceindex(i, k)] * (1 + (distances[distanceindex(i, k)] - gnarf[distanceindex(i, k)]) / gnarf[distanceindex(i, k)]));
          389                                 }
          390                                 e2 *= sum;
          391 
          392                                 newpoints[i].dims[j] = points[i].dims[j] - lr * (e1 / fabs(e2));
          393                         }
          394                 }
          395 
          396                 sum = 0;
          397                 for (i = 0; i < l; i++) {
          398                         for (j = 0; j < i; j++) {
          399                                 gnarf[distanceindex(i, j)] = sqrt(pow(newpoints[i].dims[0] - newpoints[j].dims[0], 2) + pow(newpoints[i].dims[1] - newpoints[j].dims[1], 2));
          400                                 sum += pow(distances[distanceindex(i, j)] - gnarf[distanceindex(i, j)], 2) / distances[distanceindex(i, j)];
          401                         }
          402                 }
          403 printf("%lf\n", sum / c);
          404 if (t > 0 && fabs(sum / c - lasterror) < 0.000000000001)
          405         break;
          406 lasterror = sum / c;
          407 
          408                 memcpy(points, newpoints, l * sizeof(*points));
          409         }
          410 
          411         free(newpoints);
          412         free(gnarf);
          413 }
          414 
          415 int
          416 dynmsc(struct gopherentry *g, size_t l, size_t k, double *distances, size_t *assoc, uint32_t prng)
          417 {
          418         size_t i, j, n, ck;
          419         ssize_t m1, m, m2;
          420         size_t ti, th, huh;
          421         size_t iter;
          422         double dsum, d, d2, gnarf, tdsum;
          423         struct {
          424                 size_t medoid;
          425                 size_t n;
          426                 double dsum;
          427         } *clusters;
          428         struct {
          429                 size_t n1, n2, n3;
          430                 double d1, d2, d3;
          431         } *cache;
          432         double *dS, *dS2;
          433         double cdS, cdS2;
          434         size_t cm, cx;
          435         ssize_t lx;
          436         int flag, result;
          437         uint32_t r;
          438         double *C;
          439 
          440         if (!l)
          441                 return 0;
          442 
          443         if (l < k)
          444                 k = l;
          445 
          446         clusters = calloc(k, sizeof(*clusters));
          447         if (!clusters)
          448                 return -1;
          449 
          450         result = -1;
          451 
          452 /*
          453         Seed the medoids randomly
          454 */
          455         for (i = n = 0; i < l; i++) {
          456                 if ((l - i) * (xorshift(&prng) / (double)UINT32_MAX) < k - n)
          457                         clusters[n++].medoid = i;
          458                 if (n == k)
          459                         break;
          460         }
          461 
          462         cache = calloc(l, sizeof(*cache));
          463         if (!cache)
          464                 goto cleanup;
          465 
          466         dS = calloc(k, sizeof(*dS));
          467         if (!dS)
          468                 goto cleanup;
          469 
          470         dS2 = calloc(k, sizeof(*dS2));
          471         if (!dS2)
          472                 goto cleanup;
          473 
          474 while (k > 3) {
          475         for (i = 0; i < l; i++) {
          476                 cache[i].d1 = cache[i].d2 = cache[i].d3 = INFINITY;
          477                 for (j = 0; j < k; j++) {
          478                         d = distances[distanceindex(i, clusters[j].medoid)];
          479                         if (d < cache[i].d1) {
          480                                 cache[i].d3 = cache[i].d2;
          481                                 cache[i].d2 = cache[i].d1;
          482                                 cache[i].d1 = d;
          483                                 cache[i].n3 = cache[i].n2;
          484                                 cache[i].n2 = cache[i].n1;
          485                                 cache[i].n1 = j;
          486                         } else if (d < cache[i].d2) {
          487                                 cache[i].d3 = cache[i].d2;
          488                                 cache[i].d2 = d;
          489                                 cache[i].n3 = cache[i].n2;
          490                                 cache[i].n2 = j;
          491                         } else if (d < cache[i].d3) {
          492                                 cache[i].d3 = d;
          493                                 cache[i].n3 = j;
          494                         }
          495                 }
          496         }
          497 
          498         for (i = 0; i < k; i++) {
          499                 dS[i] = 0;
          500                 for (j = 0; j < l; j++) {
          501                         if (cache[j].n1 != i)
          502                                 continue;
          503                         dS[i] += cache[j].d1 / cache[j].d2 - cache[j].d2 / cache[j].d3;
          504                 }
          505                 for (j = 0; j < l; j++) {
          506                         if (cache[j].n2 != i)
          507                                 continue;
          508                         dS[i] += cache[j].d1 / cache[j].d2 - cache[j].d1 / cache[j].d3;
          509                 }
          510         }
          511 
          512         lx = -1;
          513         for (iter = 0; iter < 100; iter++) {
          514                 cdS = 0;
          515                 for (i = 0; i < l; i++) {
          516                         if (i == lx)
          517                                 goto fastermsc_finished;
          518 
          519                         for (j = 0; j < k; j++) {
          520                                 if (clusters[j].medoid == i)
          521                                         goto fastmsc_nexti;
          522                         }
          523 
          524                         memcpy(dS2, dS, k * sizeof(*dS2));
          525                         cdS2 = 0;
          526                         for (j = 0; j < l; j++) {
          527                                 d = distances[distanceindex(i, j)];
          528                                 if (d < cache[j].d1) {
          529                                         cdS2 += cache[j].d1 / cache[j].d2 - d / cache[j].d1;
          530                                         dS2[cache[j].n1] += d / cache[j].d1 + cache[j].d2 / cache[j].d3 - (cache[j].d1 + d) / cache[j].d2; 
          531                                         dS2[cache[j].n2] += cache[j].d1 / cache[j].d3 - cache[j].d1 / cache[j].d2; 
          532                                 } else if (d < cache[j].d2) {
          533                                         cdS2 += cache[j].d1 / cache[j].d2 - cache[j].d1 / d;
          534                                         dS2[cache[j].n1] += cache[j].d1 / d + cache[j].d2 / cache[j].d3 - (cache[j].d1 + d) / cache[j].d2; 
          535                                         dS2[cache[j].n2] += cache[j].d1 / cache[j].d3 - cache[j].d1 / cache[j].d2; 
          536                                 } else if (d < cache[j].d3) {
          537                                         dS2[cache[j].n1] += cache[j].d2 / cache[j].d3 - cache[j].d2 / d; 
          538                                         dS2[cache[j].n2] += cache[j].d1 / cache[j].d3 - cache[j].d1 / d; 
          539                                 }
          540                         }
          541                         d = -INFINITY;
          542                         for (j = 0; j < k; j++) {
          543                                 if (dS2[j] > d) {
          544                                         d = dS2[j];
          545                                         m = j;
          546                                 }
          547                         }
          548                         dS2[m] += cdS2;
          549                         if (dS2[m] <= 0)
          550                                 continue;
          551 //printf("%lf\t%lf\n", dS2[m], cdS2);
          552 printf("%lu (%lu) <-> %lu\n", clusters[m].medoid, (size_t)m, i);
          553                         lx = i;
          554                         clusters[m].medoid = i;
          555                         for (j = 0; j < l; j++) {
          556                                 if (cache[j].n1 != m && cache[j].n2 != m && cache[j].n3 != m)
          557                                         continue;
          558                                 cache[j].d1 = cache[j].d2 = cache[j].d3 = INFINITY;
          559                                 for (n = 0; n < k; n++) {
          560                                         d = distances[distanceindex(j, clusters[n].medoid)];
          561                                         if (d < cache[j].d1) {
          562                                                 cache[j].d3 = cache[j].d2;
          563                                                 cache[j].d2 = cache[j].d1;
          564                                                 cache[j].d1 = d;
          565                                                 cache[j].n3 = cache[j].n2;
          566                                                 cache[j].n2 = cache[j].n1;
          567                                                 cache[j].n1 = n;
          568                                         } else if (d < cache[j].d2) {
          569                                                 cache[j].d3 = cache[j].d2;
          570                                                 cache[j].d2 = d;
          571                                                 cache[j].n3 = cache[j].n2;
          572                                                 cache[j].n2 = n;
          573                                         } else if (d < cache[j].d3) {
          574                                                 cache[j].d3 = d;
          575                                                 cache[j].n3 = n;
          576                                         }
          577                                 }
          578                         }
          579                         for (j = 0; j < k; j++) {
          580                                 dS[j] = 0;
          581                                 for (n = 0; n < l; n++) {
          582                                         if (cache[n].n1 != j)
          583                                                 continue;
          584                                         dS[j] += cache[n].d1 / cache[n].d2 - cache[n].d2 / cache[n].d3;
          585                                 }
          586                                 for (n = 0; n < l; n++) {
          587                                         if (cache[n].n2 != j)
          588                                                 continue;
          589                                         dS[j] += cache[n].d1 / cache[n].d2 - cache[n].d1 / cache[n].d3;
          590                                 }
          591                         }
          592 printf("lx = %lu\n", i);
          593 fastmsc_nexti:;
          594                 }
          595                 if (lx == -1)
          596                         break;
          597         }
          598 fastermsc_finished:;
          599         d = INFINITY;
          600         for (j = 0; j < k; j++) {
          601                 if (dS2[j] < d) {
          602                         d = dS2[j];
          603                         m = j;
          604                 }
          605         }
          606         if (m < k - 1)
          607                 memmove(&clusters[m], &clusters[m + 1], k - m - 1);
          608         k--;
          609 printf("k = %lu\n", k);
          610 }
          611 
          612         for (i = 0; i < k; i++)
          613                 clusters[i].n = 0;
          614 
          615         for (i = 0; i < l; i++) {
          616                 d = INFINITY;
          617                 for (j = 0; j < k; j++) {
          618                         if (distances[distanceindex(clusters[j].medoid, i)] < d) {
          619                                 assoc[i] = j;
          620                                 d = distances[distanceindex(clusters[j].medoid, i)];
          621                         }
          622                 }
          623                 clusters[assoc[i]].n++;
          624         }
          625 
          626         dsum = 0;
          627         for (i = 0; i < l; i++) {
          628                 d = INFINITY;
          629                 d2 = INFINITY;
          630                 for (j = 0; j < k; j++) {
          631                         if (distances[distanceindex(clusters[j].medoid, i)] < d) {
          632                                 d2 = d;
          633                                 d = distances[distanceindex(clusters[j].medoid, i)];
          634                         } else if (distances[distanceindex(clusters[j].medoid, i)] < d2) {
          635                                 d2 = distances[distanceindex(clusters[j].medoid, i)];
          636                         }
          637                 }
          638                 if (clusters[assoc[i]].medoid != i)
          639                         dsum += 1 - d / (d2 + 0.000000000001);
          640                 printf("%lu\t%lu\t%lf\t%lf\t%lf\t%lu\t%s\n", i, assoc[i], 1 - d / (d2 + 0.000000000001), d, d2, g[i].s, g[i].fields[1]);
          641         }
          642         printf("Silhouette = %lf\n", dsum / (l - k));
          643 
          644         result = k;
          645 cleanup:
          646         free(clusters);
          647         return result;
          648 }
          649 
          650 int
          651 fastermsc(struct gopherentry *g, size_t l, size_t k, double *distances, size_t *assoc, uint32_t prng)
          652 {
          653         size_t i, j, n, ck;
          654         ssize_t m1, m, m2;
          655         size_t ti, th, huh;
          656         double dsum, d, d2, gnarf, tdsum;
          657         struct {
          658                 size_t medoid;
          659                 size_t n;
          660                 double dsum;
          661         } *clusters;
          662         struct {
          663                 size_t n1, n2, n3;
          664                 double d1, d2, d3;
          665         } *cache;
          666         double *dS, *dS2;
          667         double cdS, cdS2;
          668         size_t cm, cx;
          669         ssize_t lx;
          670         int flag, result;
          671         uint32_t r;
          672         double *C;
          673 
          674         if (!l)
          675                 return 0;
          676 
          677         if (l < k)
          678                 k = l;
          679 
          680         clusters = calloc(k, sizeof(*clusters));
          681         if (!clusters)
          682                 return -1;
          683 
          684         result = -1;
          685 
          686 /*
          687         Seed the medoids randomly
          688 */
          689         for (i = n = 0; i < l; i++) {
          690                 if ((l - i) * (xorshift(&prng) / (double)UINT32_MAX) < k - n)
          691                         clusters[n++].medoid = i;
          692                 if (n == k)
          693                         break;
          694         }
          695 
          696         cache = calloc(l, sizeof(*cache));
          697         if (!cache)
          698                 goto cleanup;
          699 
          700         dS = calloc(k, sizeof(*dS));
          701         if (!dS)
          702                 goto cleanup;
          703 
          704         dS2 = calloc(k, sizeof(*dS2));
          705         if (!dS2)
          706                 goto cleanup;
          707 
          708         for (i = 0; i < l; i++) {
          709                 cache[i].d1 = cache[i].d2 = cache[i].d3 = INFINITY;
          710                 for (j = 0; j < k; j++) {
          711                         d = distances[distanceindex(i, clusters[j].medoid)];
          712                         if (d < cache[i].d1) {
          713                                 cache[i].d3 = cache[i].d2;
          714                                 cache[i].d2 = cache[i].d1;
          715                                 cache[i].d1 = d;
          716                                 cache[i].n3 = cache[i].n2;
          717                                 cache[i].n2 = cache[i].n1;
          718                                 cache[i].n1 = j;
          719                         } else if (d < cache[i].d2) {
          720                                 cache[i].d3 = cache[i].d2;
          721                                 cache[i].d2 = d;
          722                                 cache[i].n3 = cache[i].n2;
          723                                 cache[i].n2 = j;
          724                         } else if (d < cache[i].d3) {
          725                                 cache[i].d3 = d;
          726                                 cache[i].n3 = j;
          727                         }
          728                 }
          729         }
          730 
          731         for (i = 0; i < k; i++) {
          732                 dS[i] = 0;
          733                 for (j = 0; j < l; j++) {
          734                         if (cache[j].n1 != i)
          735                                 continue;
          736                         dS[i] += cache[j].d1 / cache[j].d2 - cache[j].d2 / cache[j].d3;
          737                 }
          738                 for (j = 0; j < l; j++) {
          739                         if (cache[j].n2 != i)
          740                                 continue;
          741                         dS[i] += cache[j].d1 / cache[j].d2 - cache[j].d1 / cache[j].d3;
          742                 }
          743         }
          744 
          745         lx = -1;
          746         for (;;) {
          747                 cdS = 0;
          748                 for (i = 0; i < l; i++) {
          749                         if (i == lx)
          750                                 goto fastermsc_finished;
          751 
          752                         for (j = 0; j < k; j++) {
          753                                 if (clusters[j].medoid == i)
          754                                         goto fastmsc_nexti;
          755                         }
          756 
          757                         memcpy(dS2, dS, k * sizeof(*dS2));
          758                         cdS2 = 0;
          759                         for (j = 0; j < l; j++) {
          760                                 d = distances[distanceindex(i, j)];
          761                                 if (d < cache[j].d1) {
          762                                         cdS2 += cache[j].d1 / cache[j].d2 - d / cache[j].d1;
          763                                         dS2[cache[j].n1] += d / cache[j].d1 + cache[j].d2 / cache[j].d3 - (cache[j].d1 + d) / cache[j].d2; 
          764                                         dS2[cache[j].n2] += cache[j].d1 / cache[j].d3 - cache[j].d1 / cache[j].d2; 
          765                                 } else if (d < cache[j].d2) {
          766                                         cdS2 += cache[j].d1 / cache[j].d2 - cache[j].d1 / d;
          767                                         dS2[cache[j].n1] += cache[j].d1 / d + cache[j].d2 / cache[j].d3 - (cache[j].d1 + d) / cache[j].d2; 
          768                                         dS2[cache[j].n2] += cache[j].d1 / cache[j].d3 - cache[j].d1 / cache[j].d2; 
          769                                 } else if (d < cache[j].d3) {
          770                                         dS2[cache[j].n1] += cache[j].d2 / cache[j].d3 - cache[j].d2 / d; 
          771                                         dS2[cache[j].n2] += cache[j].d1 / cache[j].d3 - cache[j].d1 / d; 
          772                                 }
          773                         }
          774                         d = -INFINITY;
          775                         for (j = 0; j < k; j++) {
          776                                 if (dS2[j] > d) {
          777                                         d = dS2[j];
          778                                         m = j;
          779                                 }
          780                         }
          781                         dS2[m] += cdS2;
          782                         if (dS2[m] <= 0)
          783                                 continue;
          784 //printf("%lf\t%lf\n", dS2[m], cdS2);
          785 printf("%lu (%lu) <-> %lu\n", clusters[m].medoid, (size_t)m, i);
          786                         lx = i;
          787                         clusters[m].medoid = i;
          788                         for (j = 0; j < l; j++) {
          789                                 if (cache[j].n1 != m && cache[j].n2 != m && cache[j].n3 != m)
          790                                         continue;
          791                                 cache[j].d1 = cache[j].d2 = cache[j].d3 = INFINITY;
          792                                 for (n = 0; n < k; n++) {
          793                                         d = distances[distanceindex(j, clusters[n].medoid)];
          794                                         if (d < cache[j].d1) {
          795                                                 cache[j].d3 = cache[j].d2;
          796                                                 cache[j].d2 = cache[j].d1;
          797                                                 cache[j].d1 = d;
          798                                                 cache[j].n3 = cache[j].n2;
          799                                                 cache[j].n2 = cache[j].n1;
          800                                                 cache[j].n1 = n;
          801                                         } else if (d < cache[j].d2) {
          802                                                 cache[j].d3 = cache[j].d2;
          803                                                 cache[j].d2 = d;
          804                                                 cache[j].n3 = cache[j].n2;
          805                                                 cache[j].n2 = n;
          806                                         } else if (d < cache[j].d3) {
          807                                                 cache[j].d3 = d;
          808                                                 cache[j].n3 = n;
          809                                         }
          810                                 }
          811                         }
          812                         for (j = 0; j < k; j++) {
          813                                 dS[j] = 0;
          814                                 for (n = 0; n < l; n++) {
          815                                         if (cache[n].n1 != j)
          816                                                 continue;
          817                                         dS[j] += cache[n].d1 / cache[n].d2 - cache[n].d2 / cache[n].d3;
          818                                 }
          819                                 for (n = 0; n < l; n++) {
          820                                         if (cache[n].n2 != j)
          821                                                 continue;
          822                                         dS[j] += cache[n].d1 / cache[n].d2 - cache[n].d1 / cache[n].d3;
          823                                 }
          824                         }
          825 printf("lx = %lu\n", i);
          826 fastmsc_nexti:;
          827                 }
          828                 if (lx == -1)
          829                         break;
          830         }
          831 fastermsc_finished:;
          832 
          833         for (i = 0; i < k; i++)
          834                 clusters[i].n = 0;
          835 
          836         for (i = 0; i < l; i++) {
          837                 d = INFINITY;
          838                 for (j = 0; j < k; j++) {
          839                         if (distances[distanceindex(clusters[j].medoid, i)] < d) {
          840                                 assoc[i] = j;
          841                                 d = distances[distanceindex(clusters[j].medoid, i)];
          842                         }
          843                 }
          844                 clusters[assoc[i]].n++;
          845         }
          846 
          847         dsum = 0;
          848         for (i = 0; i < l; i++) {
          849                 d = INFINITY;
          850                 d2 = INFINITY;
          851                 for (j = 0; j < k; j++) {
          852                         if (distances[distanceindex(clusters[j].medoid, i)] < d) {
          853                                 d2 = d;
          854                                 d = distances[distanceindex(clusters[j].medoid, i)];
          855                         } else if (distances[distanceindex(clusters[j].medoid, i)] < d2) {
          856                                 d2 = distances[distanceindex(clusters[j].medoid, i)];
          857                         }
          858                 }
          859                 if (clusters[assoc[i]].medoid != i)
          860                         dsum += 1 - d / (d2 + 0.000000000001);
          861                 printf("%lu\t%lu\t%lf\t%lf\t%lf\t%lu\t%s\n", i, assoc[i], 1 - d / (d2 + 0.000000000001), d, d2, g[i].s, g[i].fields[1]);
          862         }
          863         printf("Silhouette = %lf\n", dsum / (l - k));
          864 
          865         result = k;
          866 cleanup:
          867         free(clusters);
          868         return result;
          869 }
          870 
          871 int
          872 fastmsc(struct gopherentry *g, size_t l, size_t k, double *distances, size_t *assoc, uint32_t prng)
          873 {
          874         size_t i, j, n, ck;
          875         ssize_t m1, m, m2;
          876         size_t ti, th;
          877         double dsum, d, d2, gnarf, tdsum;
          878         struct {
          879                 size_t medoid;
          880                 size_t n;
          881                 double dsum;
          882         } *clusters;
          883         struct {
          884                 size_t n1, n2;
          885                 double d1, d2, d3;
          886         } *cache;
          887         double *dS, *dS2;
          888         double cdS, cdS2;
          889         size_t cm, cx;
          890         int flag, result;
          891         uint32_t r;
          892         double *C;
          893 
          894         if (!l)
          895                 return 0;
          896 
          897         if (l < k)
          898                 k = l;
          899 
          900         clusters = calloc(k, sizeof(*clusters));
          901         if (!clusters)
          902                 return -1;
          903 
          904         result = -1;
          905 
          906 /*
          907         seed the medoids using PAM BUILD
          908 */
          909         d = INFINITY;
          910         m = -1;
          911         for (i = 0; i < l; i++) {
          912                 dsum = 0;
          913                 for (j = 0; j < l; j++)
          914                         dsum += distances[distanceindex(i, j)];
          915                 if (dsum < d) {
          916                         m = i;
          917                         d = dsum;
          918                 }
          919         }
          920         if (m == -1)
          921                 goto cleanup;
          922         ck = 0;
          923         clusters[ck].medoid = m;
          924         ck++;
          925 
          926         C = xmalloc(l * l * sizeof(*C));
          927 
          928         for (; ck < k; ck++) {
          929                 memset(C, 0, l * l * sizeof(*C));
          930                 for (i = 0; i < l; i++) {
          931                         for (j = 0; j < ck; j++) {
          932                                 if (clusters[j].medoid == i)
          933                                         goto build_nexti;
          934                         }
          935                         for (j = 0; j < l; j++) {
          936                                 if (i == j)
          937                                         continue;
          938                                 for (n = 0; n < ck; n++) {
          939                                         if (clusters[n].medoid == j)
          940                                                 goto build_nextj;
          941                                 }
          942                                 d = INFINITY;
          943                                 for (n = 0; n < ck; n++) {
          944                                         if (distances[distanceindex(j, clusters[n].medoid)] < d)
          945                                                 d = distances[distanceindex(j, clusters[n].medoid)];
          946                                 }
          947                                 C[j * l + i] = d - distances[distanceindex(j, i)];
          948                                 if (C[j * l + i] < 0)
          949                                         C[j * l + i] = 0;
          950 build_nextj:;
          951                         }
          952 build_nexti:;
          953                 }
          954                 d = -1;
          955                 m = -1;
          956                 for (i = 0; i < l; i++) {
          957                         for (j = 0; j < ck; j++) {
          958                                 if (clusters[j].medoid == i)
          959                                         goto build_nexti2;
          960                         }
          961                         dsum = 0;
          962                         for (j = 0; j < l; j++)
          963                                 dsum += C[j * l + i];
          964                         if (dsum > d) {
          965                                 d = dsum;
          966                                 m = i;
          967                         }
          968 build_nexti2:;
          969                 }
          970                 clusters[ck].medoid = m;
          971         }
          972         free(C);
          973 
          974 /*
          975         Seed the medoids randomly
          976         for (i = n = 0; i < l; i++) {
          977                 if ((l - i) * (xorshift(&prng) / (double)UINT32_MAX) < k - n)
          978                         clusters[n++].medoid = i;
          979                 if (n == k)
          980                         break;
          981         }
          982 */
          983 
          984         cache = calloc(l, sizeof(*cache));
          985         if (!cache)
          986                 goto cleanup;
          987 
          988         dS = calloc(k, sizeof(*dS));
          989         if (!dS)
          990                 goto cleanup;
          991 
          992         dS2 = calloc(k, sizeof(*dS2));
          993         if (!dS2)
          994                 goto cleanup;
          995 
          996         for (;;) {
          997                 for (i = 0; i < l; i++) {
          998                         cache[i].d1 = cache[i].d2 = cache[i].d3 = INFINITY;
          999                         for (j = 0; j < k; j++) {
         1000                                 d = distances[distanceindex(i, clusters[j].medoid)];
         1001                                 if (d < cache[i].d1) {
         1002                                         cache[i].d3 = cache[i].d2;
         1003                                         cache[i].d2 = cache[i].d1;
         1004                                         cache[i].d1 = d;
         1005                                         cache[i].n2 = cache[i].n1;
         1006                                         cache[i].n1 = j;
         1007                                 } else if (d < cache[i].d2) {
         1008                                         cache[i].d3 = cache[i].d2;
         1009                                         cache[i].d2 = d;
         1010                                         cache[i].n2 = j;
         1011                                 } else if (d < cache[i].d3) {
         1012                                         cache[i].d3 = d;
         1013                                 }
         1014                         }
         1015                 }
         1016 
         1017                 for (i = 0; i < k; i++) {
         1018                         dS[i] = 0;
         1019                         for (j = 0; j < l; j++) {
         1020                                 if (cache[j].n1 != i)
         1021                                         continue;
         1022                                 dS[i] += cache[j].d1 / cache[j].d2 - cache[j].d2 / cache[j].d3;
         1023                         }
         1024                         for (j = 0; j < l; j++) {
         1025                                 if (cache[j].n2 != i)
         1026                                         continue;
         1027                                 dS[i] += cache[j].d1 / cache[j].d2 - cache[j].d1 / cache[j].d3;
         1028                         }
         1029                 }
         1030 
         1031                 cdS = 0;
         1032                 for (i = 0; i < l; i++) {
         1033                         for (j = 0; j < k; j++) {
         1034                                 if (clusters[j].medoid == i)
         1035                                         goto fastmsc_nexti;
         1036                         }
         1037                         memcpy(dS2, dS, k * sizeof(*dS2));
         1038                         cdS2 = 0;
         1039                         for (j = 0; j < l; j++) {
         1040                                 d = distances[distanceindex(i, j)];
         1041                                 if (d < cache[j].d1) {
         1042                                         cdS2 += cache[j].d1 / cache[j].d2 - d / cache[j].d1;
         1043                                         dS2[cache[j].n1] += d / cache[j].d1 + cache[j].d2 / cache[j].d3 - (cache[j].d1 + d) / cache[j].d2; 
         1044                                         dS2[cache[j].n2] += cache[j].d1 / cache[j].d3 - cache[j].d1 / cache[j].d2; 
         1045                                 } else if (d < cache[j].d2) {
         1046                                         cdS2 += cache[j].d1 / cache[j].d2 - cache[j].d1 / d;
         1047                                         dS2[cache[j].n1] += cache[j].d1 / d + cache[j].d2 / cache[j].d3 - (cache[j].d1 + d) / cache[j].d2; 
         1048                                         dS2[cache[j].n2] += cache[j].d1 / cache[j].d3 - cache[j].d1 / cache[j].d2; 
         1049                                 } else if (d < cache[j].d3) {
         1050                                         dS2[cache[j].n1] += cache[j].d2 / cache[j].d3 - cache[j].d2 / d; 
         1051                                         dS2[cache[j].n2] += cache[j].d1 / cache[j].d3 - cache[j].d1 / d; 
         1052                                 }
         1053                         }
         1054                         d = -INFINITY;
         1055                         d2 = INFINITY;
         1056                         for (j = 0; j < k; j++) {
         1057                                 if (dS2[j] > d) {
         1058                                         d = dS2[j];
         1059                                         m = j;
         1060                                 }
         1061                                 if (dS2[j] < d2) {
         1062                                         d2 = dS2[j];
         1063                                         m2 = j;
         1064                                 }
         1065                         }
         1066 printf("%lf (%ld)\t%lf (%ld)\n", d, m, d2, m2);
         1067                         dS2[m] += cdS2;
         1068                         if (dS2[m] > cdS) {
         1069                                 cdS = dS2[m];
         1070                                 cm = m;
         1071                                 cx = i;
         1072                         }
         1073 fastmsc_nexti:;
         1074                 }
         1075                 if (cdS <= 0)
         1076                         break;
         1077                 clusters[cm].medoid = cx;
         1078         }
         1079 
         1080         for (i = 0; i < k; i++)
         1081                 clusters[i].n = 0;
         1082 
         1083         for (i = 0; i < l; i++) {
         1084                 d = INFINITY;
         1085                 for (j = 0; j < k; j++) {
         1086                         if (distances[distanceindex(clusters[j].medoid, i)] < d) {
         1087                                 assoc[i] = j;
         1088                                 d = distances[distanceindex(clusters[j].medoid, i)];
         1089                         }
         1090                 }
         1091                 clusters[assoc[i]].n++;
         1092         }
         1093 
         1094         dsum = 0;
         1095         for (i = 0; i < l; i++) {
         1096                 d = INFINITY;
         1097                 d2 = INFINITY;
         1098                 for (j = 0; j < k; j++) {
         1099                         if (distances[distanceindex(clusters[j].medoid, i)] < d) {
         1100                                 d2 = d;
         1101                                 d = distances[distanceindex(clusters[j].medoid, i)];
         1102                         } else if (distances[distanceindex(clusters[j].medoid, i)] < d2) {
         1103                                 d2 = distances[distanceindex(clusters[j].medoid, i)];
         1104                         }
         1105                 }
         1106                 if (clusters[assoc[i]].medoid != i)
         1107                         dsum += 1 - d / (d2 + 0.000000000001);
         1108                 printf("%lu\t%lu\t%lf\t%lf\t%lf\t%lu\t%s\n", i, assoc[i], 1 - d / (d2 + 0.000000000001), d, d2, g[i].s, g[i].fields[1]);
         1109         }
         1110         printf("Silhouette = %lf\n", dsum / (l - k));
         1111 
         1112         result = k;
         1113 cleanup:
         1114         free(clusters);
         1115         return result;
         1116 }
         1117 
         1118 /*
         1119         Seeding of medoids based on the PAM BUILD description in https://www.cs.umb.edu/cs738/pam1.pdf
         1120         I was too stupid to understand other descriptions.
         1121 */
         1122 int
         1123 kmedoids(struct gopherentry *g, size_t l, size_t k, double *distances, size_t *assoc, uint32_t prng)
         1124 {
         1125         size_t i, j, n, ck;
         1126         ssize_t m1, m;
         1127         size_t ti, th;
         1128         double dsum, d, d2, gnarf, tdsum;
         1129         struct {
         1130                 size_t medoid;
         1131                 size_t n;
         1132                 double dsum;
         1133         } *clusters;
         1134         int flag, result;
         1135         uint32_t r;
         1136         double *C;
         1137 
         1138         if (!l)
         1139                 return 0;
         1140 
         1141         if (l < k)
         1142                 k = l;
         1143 
         1144         clusters = calloc(k, sizeof(*clusters));
         1145         if (!clusters)
         1146                 return -1;
         1147 
         1148         result = -1;
         1149 
         1150         /* seed the medoids using PAM BUILD */
         1151         d = INFINITY;
         1152         m = -1;
         1153         for (i = 0; i < l; i++) {
         1154                 dsum = 0;
         1155                 for (j = 0; j < l; j++)
         1156                         dsum += distances[distanceindex(i, j)];
         1157                 if (dsum < d) {
         1158                         m = i;
         1159                         d = dsum;
         1160                 }
         1161         }
         1162         if (m == -1)
         1163                 goto cleanup;
         1164         ck = 0;
         1165         clusters[ck].medoid = m;
         1166         ck++;
         1167 
         1168         C = xmalloc(l * l * sizeof(*C));
         1169 
         1170         for (; ck < k; ck++) {
         1171                 memset(C, 0, l * l * sizeof(*C));
         1172                 for (i = 0; i < l; i++) {
         1173                         for (j = 0; j < ck; j++) {
         1174                                 if (clusters[j].medoid == i)
         1175                                         goto build_nexti;
         1176                         }
         1177                         for (j = 0; j < l; j++) {
         1178                                 if (i == j)
         1179                                         continue;
         1180                                 for (n = 0; n < ck; n++) {
         1181                                         if (clusters[n].medoid == j)
         1182                                                 goto build_nextj;
         1183                                 }
         1184                                 d = INFINITY;
         1185                                 for (n = 0; n < ck; n++) {
         1186                                         if (distances[distanceindex(j, clusters[n].medoid)] < d)
         1187                                                 d = distances[distanceindex(j, clusters[n].medoid)];
         1188                                 }
         1189                                 C[j * l + i] = d - distances[distanceindex(j, i)];
         1190                                 if (C[j * l + i] < 0)
         1191                                         C[j * l + i] = 0;
         1192 build_nextj:;
         1193                         }
         1194 build_nexti:;
         1195                 }
         1196                 d = -1;
         1197                 m = -1;
         1198                 for (i = 0; i < l; i++) {
         1199                         for (j = 0; j < ck; j++) {
         1200                                 if (clusters[j].medoid == i)
         1201                                         goto build_nexti2;
         1202                         }
         1203                         dsum = 0;
         1204                         for (j = 0; j < l; j++)
         1205                                 dsum += C[j * l + i];
         1206                         if (dsum > d) {
         1207                                 d = dsum;
         1208                                 m = i;
         1209                         }
         1210 build_nexti2:;
         1211                 }
         1212                 clusters[ck].medoid = m;
         1213         }
         1214         free(C);
         1215 
         1216 /*
         1217         Seed the medoids randomly
         1218         for (i = n = 0; i < l; i++) {
         1219                 if ((l - i) * (xorshift(&prng) / (double)UINT32_MAX) < k - n)
         1220                         clusters[n++].medoid = i;
         1221                 if (n == k)
         1222                         break;
         1223         }
         1224 */
         1225 
         1226         for (;;) {
         1227                 tdsum = INFINITY;
         1228                 for (i = 0; i < k; i++) {
         1229                         for (j = 0; j < l; j++) {
         1230                                 for (n = 0; n < k; n++) {
         1231                                         if (j == clusters[n].medoid)
         1232                                                 goto swap_nextj;
         1233                                 }
         1234                                 dsum = 0;
         1235                                 for (n = 0; n < l; n++) {
         1236                                         if (n == j)
         1237                                                 continue;
         1238                                         for (m = 0; m < k; m++) {
         1239                                                 if (clusters[m].medoid == n)
         1240                                                         goto swap_nextn;
         1241                                         }
         1242                                         d = INFINITY;
         1243                                         d2 = INFINITY;
         1244                                         for (m = 0; m < k; m++) {
         1245                                                 if (distances[distanceindex(clusters[m].medoid, n)] < d) {
         1246                                                         d2 = d;
         1247                                                         d = distances[distanceindex(clusters[m].medoid, n)];
         1248                                                 } else if (distances[distanceindex(clusters[m].medoid, n)] < d2) {
         1249                                                         d2 = distances[distanceindex(clusters[m].medoid, n)];
         1250                                                 }
         1251                                         }
         1252 /*
         1253 printf("%lf\t%lf\t%lf\t%lf\n", distances[distanceindex(clusters[i].medoid, n)], distances[distanceindex(j, n)], d, d2);
         1254 */
         1255                                         if (distances[distanceindex(clusters[i].medoid, n)] > d) {
         1256                                                 if (distances[distanceindex(j, n)] >= d)
         1257                                                         gnarf = 0;
         1258                                                 else
         1259                                                         gnarf = distances[distanceindex(j, n)] - d;
         1260                                         } else if (distances[distanceindex(clusters[i].medoid, n)] == d) {
         1261                                                 if (distances[distanceindex(j, n)] < d2)
         1262                                                         gnarf = distances[distanceindex(j, n)] - d;
         1263                                                 else
         1264                                                         gnarf = d2 - d;
         1265                                         } else {
         1266                                                 continue;
         1267                                         }
         1268                                         dsum += gnarf;
         1269 swap_nextn:;
         1270                                 }
         1271                                 if (dsum < tdsum) {
         1272                                         ti = i;
         1273                                         th = j;
         1274                                         tdsum = dsum;
         1275 /*
         1276 printf("%lu\t%lu\t%lf\n", clusters[ti].medoid, th, tdsum);
         1277 */
         1278                                 }
         1279 swap_nextj:;
         1280                         }
         1281                 }
         1282                 if (tdsum >= 0)
         1283                         break;
         1284                 clusters[ti].medoid = th;
         1285         }
         1286 
         1287         for (i = 0; i < k; i++)
         1288                 clusters[i].n = 0;
         1289 
         1290         for (i = 0; i < l; i++) {
         1291                 d = INFINITY;
         1292                 for (j = 0; j < k; j++) {
         1293                         if (distances[distanceindex(clusters[j].medoid, i)] < d) {
         1294                                 assoc[i] = j;
         1295                                 d = distances[distanceindex(clusters[j].medoid, i)];
         1296                         }
         1297                 }
         1298                 clusters[assoc[i]].n++;
         1299         }
         1300 
         1301         dsum = 0;
         1302         for (i = 0; i < l; i++) {
         1303                 d = INFINITY;
         1304                 d2 = INFINITY;
         1305                 for (j = 0; j < k; j++) {
         1306                         if (distances[distanceindex(clusters[j].medoid, i)] < d) {
         1307                                 d2 = d;
         1308                                 d = distances[distanceindex(clusters[j].medoid, i)];
         1309                         } else if (distances[distanceindex(clusters[j].medoid, i)] < d2) {
         1310                                 d2 = distances[distanceindex(clusters[j].medoid, i)];
         1311                         }
         1312                 }
         1313                 if (clusters[assoc[i]].medoid != i)
         1314                         dsum += 1 - d / (d2 + 0.000000000001);
         1315                 printf("%lu\t%lu\t%lf\t%lf\t%lf\t%lu\t%s\n", i, assoc[i], 1 - d / (d2 + 0.000000000001), d, d2, g[i].s, g[i].fields[1]);
         1316                 if (clusters[assoc[i]].medoid == i)
         1317                         assoc[i] |= 128;
         1318         }
         1319         printf("Silhouette = %lf\n", dsum / (l - k));
         1320 
         1321 /*
         1322         while (1) {
         1323                 flag = 0;
         1324                 for (i = 0; i < l; i++) {
         1325                         d = INFINITY;
         1326                         m = assoc[i];
         1327                         for (j = 0; j < k; j++) {
         1328                                 if (distances[distanceindex(clusters[j].medoid, i)] < d) {
         1329                                         d = distances[distanceindex(clusters[j].medoid, i)];
         1330                                         assoc[i] = j;
         1331                                 }
         1332                         }
         1333                         if (assoc[i] != m)
         1334                                 flag = 1;
         1335                 }
         1336                 if (!flag)
         1337                         break;
         1338 
         1339                 for (i = 0; i < k; i++)
         1340                         clusters[i].dsum = INFINITY;
         1341 
         1342                 for (i = 0; i < l; i++) {
         1343                         dsum = 0;
         1344                         for (j = 0; j < l; j++) {
         1345                                 if (assoc[i] != assoc[j])
         1346                                         continue;
         1347                                 dsum += distanceindex(i, j);
         1348                         }
         1349                         if (dsum < clusters[assoc[i]].dsum) {
         1350                                 clusters[assoc[i]].dsum = dsum;
         1351                                 clusters[assoc[i]].medoid = i;
         1352                         }
         1353                 }
         1354         }
         1355 */
         1356         result = k;
         1357 cleanup:
         1358         free(clusters);
         1359         return result;
         1360 }
         1361 
         1362 struct room {
         1363         struct room *p;
         1364         void *d;
         1365         size_t x, y;
         1366         size_t w, h;
         1367 };
         1368 
         1369 int
         1370 generaterooms_collides(size_t x1, size_t y1, size_t w1, size_t h1,
         1371                        size_t x2, size_t y2, size_t w2, size_t h2,
         1372                        size_t margin)
         1373 {
         1374         return !((y1 >= y2 + h2 + margin || y2 >= y1 + h1 + margin) || (x1 >= x2 + w2 + margin || x2 >= x1 + w1 + margin));
         1375 }
         1376 
         1377 int
         1378 generaterooms_isfree(struct room *rs, size_t l, size_t x, size_t y, size_t w, size_t h, size_t margin)
         1379 {
         1380         size_t i;
         1381 
         1382         for (i = 0; i < l; i++) {
         1383                 if (generaterooms_collides(rs[i].x, rs[i].y, rs[i].w, rs[i].h,
         1384                                            x, y, w, h, margin))
         1385                         return 0;
         1386         }
         1387         return 1;
         1388 }
         1389 
         1390 struct rect {
         1391         struct rect *next, *next2;
         1392         struct room *room;
         1393         size_t x1, y1;
         1394         size_t x2, y2;
         1395         size_t d;
         1396 };
         1397 
         1398 struct rect *
         1399 generaterooms_gnarf_randomneighbor(struct rect *x, struct rect *rs, uint32_t *prng)
         1400 {
         1401         struct rect *r, *result;
         1402         size_t n;
         1403 
         1404         n = 0;
         1405         result = NULL;
         1406         for (r = rs; r; r = r->next) {
         1407                 if (r->y2 < x->y1 || r->y1 > x->y2 || r->x2 < x->x1 || r->x1 > x->x2)
         1408                         continue;
         1409                 if ((r->y2 == x->y1 || r->y1 == x->y2) && (r->x2 == x->x1 || r->x1 == x->x2))
         1410                         continue;
         1411                 n++;
         1412                 if (xorshift(prng) / (1. + UINT32_MAX) < 1. / n)
         1413                         result = r;
         1414         }
         1415 
         1416         return result;
         1417 }
         1418 
         1419 #define ROOM_HEIGHT_MIN 3
         1420 #define ROOM_WIDTH_MIN 5
         1421 #define ROOM_MARGIN_MIN 1
         1422 #define CELL_HEIGHT_MIN (ROOM_HEIGHT_MIN + ROOM_MARGIN_MIN + 3)
         1423 #define CELL_WIDTH_MIN (ROOM_WIDTH_MIN + ROOM_MARGIN_MIN + 3)
         1424 size_t
         1425 generaterooms_gnarf(char *host, char *port, char *selector, struct room *rs, size_t l)
         1426 {
         1427         struct rect *queuehead, *queuetail;
         1428         struct rect *r, *t;
         1429         struct rect *rects, *walk;
         1430         size_t w, h, i, j, rl, n;
         1431         uint32_t prng;
         1432         int vertical;
         1433         struct room *room;
         1434 
         1435         prng = fnv1a(4, host, port, selector, "seed");
         1436 
         1437         r = xmalloc(sizeof(*r));
         1438         r->x1 = r->y1 = ROOM_MARGIN_MIN;
         1439         r->x2 = MAPWIDTH;
         1440         r->y2 = MAPHEIGHT;
         1441         r->d = 0;
         1442 
         1443         queuetail = r;
         1444         queuetail->next = NULL;
         1445         queuehead = r;
         1446 
         1447         rects = NULL;
         1448         rl = 0;
         1449 
         1450         while (queuehead) {
         1451                 r = queuehead;
         1452                 if (queuetail == queuehead)
         1453                         queuetail = NULL;
         1454                 queuehead = queuehead->next;
         1455 
         1456 /*
         1457                 if (r->d >= 8) {
         1458                         r->next = rects;
         1459                         rects = r;
         1460                         rl++;
         1461                         continue;
         1462                 }
         1463 */
         1464 
         1465                 if (r->x2 - r->x1 >= CELL_WIDTH_MIN * 2 && r->y2 - r->y1 >= CELL_HEIGHT_MIN * 2) {
         1466                         vertical = xorshift(&prng) & 1;
         1467                 } else if (r->x2 - r->x1 >= CELL_WIDTH_MIN * 2) {
         1468                         vertical = 0;
         1469                 } else if (r->y2 - r->y1 >= CELL_HEIGHT_MIN * 2) {
         1470                         vertical = 1;
         1471                 } else {
         1472                         r->next = rects;
         1473                         rects = r;
         1474                         rl++;
         1475                         continue;
         1476                 }
         1477 
         1478                 if (vertical) {
         1479                         w = r->x2 - r->x1;
         1480                         h = CELL_HEIGHT_MIN + xorshift(&prng) % (1 + r->y2 - r->y1 - CELL_HEIGHT_MIN * 2);
         1481                 } else {
         1482                         w = CELL_WIDTH_MIN + xorshift(&prng) % (1 + r->x2 - r->x1 - CELL_WIDTH_MIN * 2);
         1483                         h = r->y2 - r->y1;
         1484                 }
         1485 
         1486                 t = xmalloc(sizeof(*t));
         1487                 t->x1 = r->x1;
         1488                 t->y1 = r->y1;
         1489                 t->x2 = r->x1 + w;
         1490                 t->y2 = r->y1 + h;
         1491                 t->d = r->d + 1;
         1492                 t->next = NULL;
         1493                 t->room = NULL;
         1494 
         1495                 if (!queuetail) {
         1496                         queuehead = t;
         1497                         queuetail = t;
         1498                 } else {
         1499                         queuetail->next = t;
         1500                         queuetail = t;
         1501                 }
         1502 
         1503                 t = xmalloc(sizeof(*t));
         1504                 if (vertical) {
         1505                         t->x1 = r->x1;
         1506                         t->y1 = r->y1 + h;
         1507                 } else {
         1508                         t->x1 = r->x1 + w;
         1509                         t->y1 = r->y1;
         1510                 }
         1511                 t->x2 = r->x2;
         1512                 t->y2 = r->y2;
         1513                 t->d = r->d + 1;
         1514                 t->next = NULL;
         1515                 t->room = NULL;
         1516 
         1517                 queuetail->next = t;
         1518                 queuetail = t;
         1519 
         1520                 free(r);
         1521         }
         1522 
         1523         if (l > rl)
         1524                 l = rl;
         1525 
         1526         for (r = rects; r; r = r->next) {
         1527                 if (MAPHEIGHT / 2 >= r->y1 && MAPHEIGHT / 2 < r->y2 &&
         1528                     MAPWIDTH / 2 >= r->x1 && MAPWIDTH / 2 < r->x2)
         1529                         break;
         1530         }
         1531         
         1532         i = 0;
         1533         rs[i].w = ROOM_WIDTH_MIN + xorshift(&prng) % (1 + r->x2 - r->x1 - ROOM_MARGIN_MIN - ROOM_WIDTH_MIN);
         1534         rs[i].h = ROOM_HEIGHT_MIN + xorshift(&prng) % (1 + r->y2 - r->y1 - ROOM_MARGIN_MIN - ROOM_HEIGHT_MIN);
         1535         rs[i].x = r->x1 + xorshift(&prng) % (1 + r->x2 - r->x1 - ROOM_MARGIN_MIN - rs[i].w);
         1536         rs[i].y = r->y1 + xorshift(&prng) % (1 + r->y2 - r->y1 - ROOM_MARGIN_MIN - rs[i].h);
         1537         rs[i].p = NULL;
         1538         r->room = &rs[i];
         1539 
         1540         walk = r;
         1541         walk->next2 = NULL;
         1542 
         1543         i++;
         1544         for (; i < l;) {
         1545                 t = generaterooms_gnarf_randomneighbor(r, rects, &prng);
         1546                 if (!t || t->room) {
         1547                         n = 0;
         1548                         for (t = walk; t; t = t->next2) {
         1549                                 n++;
         1550                                 if (xorshift(&prng) / (1. + UINT32_MAX) < 1. / n)
         1551                                         r = t;
         1552                                 
         1553                         }
         1554                         continue;
         1555                 }
         1556                 rs[i].w = ROOM_WIDTH_MIN + xorshift(&prng) % (1 + t->x2 - t->x1 - ROOM_MARGIN_MIN - ROOM_WIDTH_MIN);
         1557                 rs[i].h = ROOM_HEIGHT_MIN + xorshift(&prng) % (1 + t->y2 - t->y1 - ROOM_MARGIN_MIN - ROOM_HEIGHT_MIN);
         1558                 rs[i].x = t->x1 + xorshift(&prng) % (1 + t->x2 - t->x1 - ROOM_MARGIN_MIN - rs[i].w);
         1559                 rs[i].y = t->y1 + xorshift(&prng) % (1 + t->y2 - t->y1 - ROOM_MARGIN_MIN - rs[i].h);
         1560                 rs[i].p = r->room;
         1561                 t->room = &rs[i];
         1562                 i++;
         1563                 r = t;
         1564                 r->next2 = walk;
         1565                 walk = r;
         1566         }
         1567 
         1568 /*
         1569         for (r = rects, i = 0; i < l; i++, r = r->next) {
         1570                 rs[i].w = 3 + xorshift(&prng) % (1 + r->x2 - r->x1 - 1 - 3);
         1571                 rs[i].h = 2 + xorshift(&prng) % (1 + r->y2 - r->y1 - 1 - 2);
         1572                 rs[i].x = r->x1 + xorshift(&prng) % (1 + r->x2 - r->x1 - 1 - rs[i].w);
         1573                 rs[i].y = r->y1 + xorshift(&prng) % (1 + r->y2 - r->y1 - 1 - rs[i].h);
         1574                 rs[i].p = NULL;
         1575         }
         1576 */
         1577 
         1578         return l;
         1579 }
         1580 
         1581 #define ROOMS_MARGIN_MIN 1
         1582 #define ROOMS_MARGIN_MAX 3
         1583 #define MAP_MARGIN 1
         1584 size_t
         1585 generaterooms_stupid(char *host, char *port, char *selector, struct room *rs, size_t l)
         1586 {
         1587         size_t i, j, ri, m, n;
         1588         ssize_t y, x;
         1589         ssize_t w, h;
         1590         char buffer[3];
         1591         struct room *r;
         1592         uint32_t prng;
         1593 
         1594         rs[0].h = 4 + fnv1a(4, host, port, selector, "h") % 5;
         1595         rs[0].w = 4 + fnv1a(4, host, port, selector, "w") % 5;
         1596         rs[0].y = MAPHEIGHT / 2 - rs[0].h / 2;
         1597         rs[0].x = MAPWIDTH / 2 - rs[0].w / 2;
         1598         for (i = 1; i < l; i++) {
         1599                 snprintf(buffer, sizeof(buffer), "%u", i);
         1600                 prng = fnv1a(5, host, port, selector, buffer, "seed");
         1601                 n = 0;
         1602                 do {
         1603                         do {
         1604                                 switch (xorshift(&prng) % 3) {
         1605                                 case 0:
         1606                                         h = 3 + xorshift(&prng) % 3;
         1607                                         w = 3 + xorshift(&prng) % 2;
         1608                                         break;
         1609                                 case 1:
         1610                                         w = 3 + xorshift(&prng) % 3;
         1611                                         h = 4 + xorshift(&prng) % 2;
         1612                                         break;
         1613                                 case 2:
         1614                                         h = w = 4 + xorshift(&prng) % 5;
         1615                                         break;
         1616                                 }
         1617                                 m = ROOMS_MARGIN_MIN + xorshift(&prng) % (1 + ROOMS_MARGIN_MAX - ROOMS_MARGIN_MIN);
         1618                                 j = xorshift(&prng) % i;
         1619                                 switch (xorshift(&prng) % 4) {
         1620                                 case 0:
         1621                                         x = rs[j].x;
         1622                                         if (w < rs[j].w)
         1623                                                 x += xorshift(&prng) % (rs[j].w - w + 1);
         1624                                         else
         1625                                                 x -= xorshift(&prng) % (w - rs[j].w + 1);
         1626                                         y = rs[j].y - h - m;
         1627                                         break;
         1628                                 case 1:
         1629                                         x = rs[j].x + rs[j].w + m;
         1630                                         y = rs[j].y;
         1631                                         if (h < rs[j].h)
         1632                                                 y += xorshift(&prng) % (rs[j].h - h + 1);
         1633                                         else
         1634                                                 y -= xorshift(&prng) % (h - rs[j].h + 1);
         1635                                         break;
         1636                                 case 2:
         1637                                         x = rs[j].x;
         1638                                         if (w < rs[j].w)
         1639                                                 x += xorshift(&prng) % (rs[j].w - w + 1);
         1640                                         else
         1641                                                 x -= xorshift(&prng) % (w - rs[j].w + 1);
         1642                                         y = rs[j].y + rs[j].h + m;
         1643                                         break;
         1644                                 case 3:
         1645                                         x = rs[j].x - w - m;
         1646                                         y = rs[j].y;
         1647                                         if (h < rs[j].h)
         1648                                                 y += xorshift(&prng) % (rs[j].h - h + 1);
         1649                                         else
         1650                                                 y -= xorshift(&prng) % (h - rs[j].h + 1);
         1651                                         break;
         1652                                 }
         1653                         } while (x < MAP_MARGIN || x + w + MAP_MARGIN > MAPWIDTH || y < MAP_MARGIN || y + h + MAP_MARGIN > MAPHEIGHT);
         1654                         n++;
         1655                 } while (n <= 100 && !generaterooms_isfree(rs, i, x, y, w, h, ROOMS_MARGIN_MIN));
         1656                 if (n > 100)
         1657                         break;
         1658                 rs[i].h = h;
         1659                 rs[i].w = w;
         1660                 rs[i].x = x;
         1661                 rs[i].y = y;
         1662                 rs[i].p = &rs[j];
         1663         }
         1664         return i;
         1665 }
         1666 
         1667 #define ROOMS_GRID_ROWS 5
         1668 #define ROOMS_GRID_COLS 10
         1669 size_t
         1670 generaterooms_grid(char *host, char *port, char *selector, struct room *rs, size_t l)
         1671 {
         1672         size_t i, j, k, x, y;
         1673         unsigned char cells[ROOMS_GRID_ROWS * ROOMS_GRID_COLS];
         1674         char buffer[3];
         1675         uint32_t prng;
         1676 
         1677         memset(cells, 0, sizeof(cells));
         1678 
         1679         if (l > ROOMS_GRID_ROWS * ROOMS_GRID_COLS)
         1680                 l = ROOMS_GRID_ROWS * ROOMS_GRID_COLS;
         1681 
         1682         for (i = 0; i < l; i++) {
         1683                 snprintf(buffer, sizeof(buffer), "%u", i);
         1684                 prng = fnv1a(5, host, port, selector, buffer, "seed");
         1685                 j = 0;
         1686                 do {
         1687                         k = xorshift(&prng) % (ROOMS_GRID_ROWS * ROOMS_GRID_COLS);
         1688                         j++;
         1689                 } while (j <= 100 && cells[k]);
         1690                 if (j > 100)
         1691                         break;
         1692                 y = k / ROOMS_GRID_COLS;
         1693                 x = k % ROOMS_GRID_COLS;
         1694                 cells[k] = 1;
         1695                 rs[i].w = 3 + xorshift(&prng) % (1 + 8 - 2 - 3);
         1696                 rs[i].h = 3 + xorshift(&prng) % (1 + 5 - 2 - 3);
         1697                 rs[i].x = x * 8 + 1 + xorshift(&prng) % (1 + 8 - 2 - rs[i].w);
         1698                 rs[i].y = y * 5 + 1 + xorshift(&prng) % (1 + 5 - 2 - rs[i].h);
         1699 /*
         1700 printf("%u\t%u\t%u\t%u\n", rs[i].w, rs[i].h, rs[i].x, rs[i].y);
         1701 */
         1702                 rs[i].p = NULL;
         1703         }
         1704 
         1705         return i;
         1706 }
         1707 
         1708 void
         1709 generaterooms_grid2_(struct room *r, uint32_t *prng, size_t x, size_t y)
         1710 {
         1711         r->w = 3 + xorshift(prng) % (1 + 8 - 2 - 3);
         1712         r->h = 3 + xorshift(prng) % (1 + 5 - 2 - 3);
         1713         r->x = x * 8 + 1 + xorshift(prng) % (1 + 8 - 2 - r->w);
         1714         r->y = y * 5 + 1 + xorshift(prng) % (1 + 5 - 2 - r->h);
         1715         r->p = NULL;
         1716 /*
         1717 printf("%u\t%u\t%u\t%u\n", r->w, r->h, r->x, r->y);
         1718 */
         1719 }
         1720 
         1721 size_t
         1722 generaterooms_grid2(char *host, char *port, char *selector, struct room *rs, size_t l)
         1723 {
         1724         const struct {
         1725                 ssize_t y, x;
         1726         } gnarf[8] = {
         1727                 { -1,  0 },
         1728                 {  0,  1 },
         1729                 {  1,  0 },
         1730                 {  0, -1 },
         1731                 { -1,  1 },
         1732                 {  1,  1 },
         1733                 {  1, -1 },
         1734                 { -1, -1 }
         1735         };
         1736         size_t i, j, k, m, n;
         1737         ssize_t x, y;
         1738         struct room *cells[ROOMS_GRID_ROWS][ROOMS_GRID_COLS];
         1739         char buffer[3];
         1740         uint32_t prng;
         1741         struct room *r;
         1742 
         1743         memset(cells, 0, sizeof(cells));
         1744 
         1745         if (l > ROOMS_GRID_ROWS * ROOMS_GRID_COLS)
         1746                 l = ROOMS_GRID_ROWS * ROOMS_GRID_COLS;
         1747 
         1748         prng = fnv1a(5, host, port, selector, "seed");
         1749         x = ROOMS_GRID_COLS / 2;
         1750         y = ROOMS_GRID_ROWS / 2;
         1751 
         1752         r = &rs[0];
         1753         generaterooms_grid2_(r, &prng, x, y);
         1754         cells[y][x] = r;
         1755 
         1756         for (i = 1; i < l; i++) {
         1757                 for (n = 0; n < 100; n++) {
         1758                         m = xorshift(&prng) % 4;
         1759                         x += gnarf[m].x;
         1760                         y += gnarf[m].y;
         1761                         if (x < 0 || x >= ROOMS_GRID_COLS || y < 0 || y >= ROOMS_GRID_ROWS || cells[y][x]) {
         1762                                 m = xorshift(&prng) % i;
         1763                                 r = &rs[m];
         1764                                 x = r->x / 8;
         1765                                 y = r->y / 5;
         1766                         } else {
         1767                                 generaterooms_grid2_(&rs[i], &prng, x, y);
         1768                                 rs[i].p = r;
         1769                                 cells[y][x] = r = &rs[i];
         1770                                 break;
         1771                         }
         1772                 }
         1773                 if (n >= 100)
         1774                         break;
         1775         }
         1776 
         1777         return i;
         1778 }
         1779 
         1780 size_t
         1781 distance(size_t x1, size_t y1, size_t x2, size_t y2)
         1782 {
         1783         size_t d;
         1784 
         1785         if (y1 < y2)
         1786                 d = y2 - y1;
         1787         else
         1788                 d = y1 - y2;
         1789         if (x1 < x2)
         1790                 d += x2 - x1;
         1791         else
         1792                 d += x1 - x2;
         1793 
         1794         return d;
         1795 }
         1796 
         1797 void
         1798 nearestpoints(struct room *a, struct room *b, size_t *ax, size_t *ay, size_t *bx, size_t *by)
         1799 {
         1800         if (a->y >= b->y && a->y < b->y + b->h) {
         1801                 *ay = *by = a->y;
         1802         } else if (b->y >= a->y && b->y < a->y + a->h) {
         1803                 *ay = *by = b->y;
         1804         } else if (a->y >= b->y) {
         1805                 *ay = a->y;
         1806                 *by = b->y + b->h - 1;
         1807         } else if (b->y >= a->y) {
         1808                 *ay = a->y + a->h - 1;
         1809                 *by = b->y;
         1810         }
         1811 
         1812         if (a->x >= b->x && a->x < b->x + b->w) {
         1813                 *ax = *bx = a->x;
         1814         } else if (b->x >= a->x && b->x < a->x + a->w) {
         1815                 *ax = *bx = b->x;
         1816         } else if (a->x >= b->x) {
         1817                 *ax = a->x;
         1818                 *bx = b->x + b->w - 1;
         1819         } else if (b->x >= a->x) {
         1820                 *ax = a->x + a->w - 1;
         1821                 *bx = b->x;
         1822         }
         1823 }
         1824 
         1825 size_t
         1826 roomdistance(struct room *a, struct room *b)
         1827 {
         1828         size_t x1, y1;
         1829         size_t x2, y2;
         1830 
         1831         nearestpoints(a, b, &x1, &y1, &x2, &y2);
         1832                 
         1833         return distance(x1, y1, x2, y2);
         1834 }
         1835 
         1836 struct room *
         1837 nearestroom(struct room *rs, size_t l, struct room *x, uint32_t prng)
         1838 {
         1839         size_t i, d, cd;
         1840         ssize_t j;
         1841         double n;
         1842 
         1843         j = -1;
         1844         d = UINT32_MAX;
         1845         for (i = 0; i < l; i++) {
         1846                 if (&rs[i] == x)
         1847                         continue;
         1848                 if ((cd = roomdistance(&rs[i], x)) < d) {
         1849                         j = i;
         1850                         d = cd;
         1851                         n = 1;
         1852                 } else if (cd == d) {
         1853                         n++;
         1854                         if (xorshift(&prng) / (UINT32_MAX + 1.) < 1. / n)
         1855                                 j = i;
         1856                 }
         1857         }
         1858 
         1859         if (j == -1)
         1860                 return NULL;
         1861 
         1862         return &rs[j];
         1863 }
         1864 
         1865 void
         1866 connectrooms(struct room *a, struct room *b)
         1867 {
         1868         size_t i, j;
         1869         ssize_t ii;
         1870         size_t x1, y1;
         1871         size_t x2, y2;
         1872 
         1873         nearestpoints(a, b, &x1, &y1, &x2, &y2);
         1874 
         1875         if (y1 > y2) {
         1876                 ii = -1;
         1877         } else if (y2 > y1) {
         1878                 ii = 1;
         1879         } else {
         1880                 ii = 0;
         1881         }
         1882 
         1883 /*
         1884 printf("%lu\t%lu\t%d\n", y1, y2, ii);
         1885 */
         1886         for (i = y1; i != y2; i += ii)
         1887                 map[i][x1].c = '.';
         1888 
         1889         if (x1 > x2) {
         1890                 ii = -1;
         1891         } else if (x2 > x1) {
         1892                 ii = 1;
         1893         } else {
         1894                 ii = 0;
         1895         }
         1896 
         1897         for (i = x1; i != x2; i += ii)
         1898                 map[y2][i].c = '.';
         1899 }
         1900 
         1901 void
         1902 rendermap(void)
         1903 {
         1904         size_t i, j;
         1905 
         1906         for (i = 0; i < MAPHEIGHT; i++) {
         1907                 for (j = 0; j < MAPWIDTH; j++)
         1908                         putchar(map[i][j].c);
         1909                 putchar('\n');
         1910         }
         1911 }
         1912 
         1913 size_t
         1914 diff(size_t a, size_t b)
         1915 {
         1916         if (a < b)
         1917                 return b - a;
         1918         return a - b;
         1919 }
         1920 
         1921 /*
         1922         https://en.wikipedia.org/wiki/Levenshtein_distance#Recursive
         1923 */
         1924 size_t
         1925 levenshteindistance_slow(char *a, char *b)
         1926 {
         1927         size_t k, l, m;
         1928 
         1929         if (!*a)
         1930                 return strlen(b);
         1931         else if (!*b)
         1932                 return strlen(a);
         1933         else if (*a == *b)
         1934                 return levenshteindistance_slow(a + 1, b + 1);
         1935 
         1936         k = levenshteindistance_slow(a + 1, b);
         1937         l = levenshteindistance_slow(a, b + 1);
         1938         m = levenshteindistance_slow(a + 1, b + 1);
         1939 
         1940         if (k < l && k < m)
         1941                 return 1 + k;
         1942         else if (l < k && l < m)
         1943                 return 1 + l;
         1944         else
         1945                 return 1 + m;
         1946 }
         1947 
         1948 ssize_t
         1949 placeitems_kmedoid(char *host, char *port, char *selector, struct gopherentry *g, size_t l, size_t *assocs, size_t k)
         1950 {
         1951         size_t i, j;
         1952         ssize_t r;
         1953         double *distances, d;
         1954         double *mdists;
         1955         struct {
         1956                 double selector;
         1957                 double segment;
         1958                 double index;
         1959         } *distanceparts, maxdistanceparts;
         1960         uint32_t prng;
         1961         struct point *points;
         1962 
         1963         distanceparts = xmalloc(triangularnumber(l) * sizeof(*distanceparts));
         1964 
         1965         memset(&maxdistanceparts, 0, sizeof(maxdistanceparts));
         1966         for (i = 0; i < l; i++) {
         1967                 memset(&distanceparts[distanceindex(i, i)], 0, sizeof(*distanceparts));
         1968                 for (j = 0; j < i; j++) {
         1969                         distanceparts[distanceindex(i, j)].selector = levenshteindistance(g[i].fields[1], g[j].fields[1]) + levenshteindistance(g[i].fields[2], g[j].fields[2]) + levenshteindistance(g[i].fields[3], g[j].fields[3]);
         1970                         distanceparts[distanceindex(i, j)].segment = diff(g[i].s, g[j].s);
         1971                         distanceparts[distanceindex(i, j)].index = diff(g[i].i, g[j].i);
         1972 
         1973                         if (distanceparts[distanceindex(i, j)].selector > maxdistanceparts.selector)
         1974                                 maxdistanceparts.selector = distanceparts[distanceindex(i, j)].selector;
         1975                         if (distanceparts[distanceindex(i, j)].segment > maxdistanceparts.segment)
         1976                                 maxdistanceparts.segment = distanceparts[distanceindex(i, j)].segment;
         1977                         if (distanceparts[distanceindex(i, j)].index > maxdistanceparts.index)
         1978                                 maxdistanceparts.index = distanceparts[distanceindex(i, j)].index;
         1979                 }
         1980         }
         1981         for (i = 0; i < l; i++) {
         1982                 for (j = 0; j < i; j++) {
         1983                         distanceparts[distanceindex(i, j)].selector /= maxdistanceparts.selector;
         1984                         distanceparts[distanceindex(i, j)].segment /= maxdistanceparts.segment;
         1985                         distanceparts[distanceindex(i, j)].index /= maxdistanceparts.index;
         1986                 }
         1987         }
         1988 
         1989         distances = xmalloc(triangularnumber(l) * sizeof(*distances));
         1990         prng = fnv1a(4, host, port, selector, "wiggle");
         1991         for (i = 0; i < l; i++) {
         1992                 distances[distanceindex(i, i)] = 0;
         1993                 for (j = 0; j < i; j++)
         1994                         distances[distanceindex(i, j)] = + distanceparts[distanceindex(i, j)].selector;
         1995         }
         1996         free(distanceparts);
         1997 
         1998         r = kmedoids(g, l, k, distances, assocs, fnv1a(4, host, port, selector, "seed"));
         1999 
         2000         points = calloc(l, sizeof(*points));
         2001         sammon(distances, l, fnv1a(4, host, port, selector, "sammonseed"), points);
         2002 
         2003         for (i = 0; i < l; i++)
         2004                 printf("%lf\t%lf\t%ld\t%lu\t%s\n", points[i].dims[0], points[i].dims[1], assocs[i] & 127, assocs[i] >> 7, g[i].fields[1]);
         2005 
         2006         free(points);
         2007         free(distances);
         2008 
         2009         return r;
         2010 }
         2011 
         2012 size_t
         2013 placeitems_hash(char *host, char *port, char *selector, struct gopherentry *g, size_t l, size_t *assocs, size_t k)
         2014 {
         2015         size_t i;
         2016 
         2017         for (i = 0; i < l; i++)
         2018                 assocs[i] = fnv1a(6, host, port, selector, g[i].fields[1], g[i].fields[2], g[i].fields[3]) % k;
         2019 
         2020         return k;
         2021 }
         2022 
         2023 size_t py, px;
         2024 
         2025 enum {
         2026         Portal,
         2027         StaircaseDown,
         2028         Bookshelf
         2029 };
         2030 
         2031 void
         2032 generatemap(char *host, char *port, char *selector, struct gopherentry *g, size_t l)
         2033 {
         2034         size_t i, j, k;
         2035         size_t x, y;
         2036         ssize_t n, m;
         2037         size_t *cassocs;
         2038         struct room *rooms, *r;
         2039         struct {
         2040                 unsigned char x, y;
         2041         } positions[3];
         2042         uint32_t prng;
         2043         char buffer[3];
         2044 
         2045         memset(map, 0, sizeof(map));
         2046 
         2047         for (i = 0; i < MAPHEIGHT; i++) {
         2048                 for (j = 0; j < MAPWIDTH; j++) {
         2049                         map[i][j].c = '#';
         2050                 }
         2051         }
         2052         k = 7;
         2053         rooms = calloc(k, sizeof(*rooms));
         2054         if (!rooms)
         2055                 return;
         2056         k = generaterooms_gnarf(host, port, selector, rooms, k);
         2057 
         2058         cassocs = calloc(l, sizeof(*cassocs));
         2059         if (!cassocs)
         2060                 goto cleanup;
         2061 
         2062         k = placeitems_kmedoid(host, port, selector, g, l, cassocs, k);
         2063 
         2064         /* Insert rooms */
         2065         for (i = 0; i < k; i++) {
         2066                 for (y = rooms[i].y; y < rooms[i].y + rooms[i].h; y++) {
         2067                         for (x = rooms[i].x; x < rooms[i].x + rooms[i].w; x++)
         2068                                 map[y][x].c = '.';
         2069                 } 
         2070         }
         2071 
         2072         /* Insert connections */
         2073         for (i = 0; i < k; i++) {
         2074                 if (rooms[i].p)
         2075                         connectrooms(&rooms[i], rooms[i].p);
         2076 /*
         2077                 if ((r = nearestroom(rooms, i, &rooms[i], fnv1a(4, host, port, selector, "gnarf"))) && r != rooms[i].p && r->p != &rooms[i])
         2078                         connectrooms(&rooms[i], r);
         2079 */
         2080         }
         2081 
         2082         /* Insert items */
         2083         for (i = 0; i < k; i++) {
         2084                 snprintf(buffer, sizeof(buffer), "%d", i);
         2085                 prng = fnv1a(4, host, port, selector, buffer);
         2086                 for (j = 0; j < 3; j++) {
         2087                         /* Ugly brute force strategy... I'm sure there is a math-way of doing this. */
         2088                         do {
         2089                                 positions[j].x = rooms[i].x + xorshift(&prng) % rooms[i].w;
         2090                                 positions[j].y = rooms[i].y + xorshift(&prng) % rooms[i].h;
         2091                                 for (m = 0; m < j && positions[m].x != positions[j].x || 
         2092                                                      positions[m].y != positions[j].y; m++);
         2093                         } while (m < j);
         2094                 }
         2095                 for (j = 0; j < l; j++) {
         2096                         if (cassocs[j] != i)
         2097                                 continue;
         2098 /*
         2099                         printf("%s\t%lu\n", g[j].fields[1], cassocs[j]);
         2100 */
         2101                         switch (g[j].type) {
         2102                         case '0':
         2103                                 x = positions[Bookshelf].x;
         2104                                 y = positions[Bookshelf].y;
         2105                                 map[y][x].c = 'E';
         2106                                 break;
         2107                         case '1':
         2108                                 if (strcmp(g[j].fields[2], host) || strcmp(g[j].fields[3], port)) {
         2109                                         x = positions[Portal].x;
         2110                                         y = positions[Portal].y;
         2111                                         map[y][x].c = 'O';
         2112                                 } else {
         2113                                         x = positions[StaircaseDown].x;
         2114                                         y = positions[StaircaseDown].y;
         2115                                         if (map[y][x].data)
         2116                                                 map[y][x].c = 'L';
         2117                                         else
         2118                                                 map[y][x].c = '>';
         2119                                 }
         2120                                 break;
         2121                         }
         2122                         g[j].next = map[y][x].data;
         2123                         map[y][x].data = &g[j];
         2124                 }
         2125         }
         2126 
         2127         free(cassocs);
         2128 cleanup:
         2129         free(rooms);
         2130 }
         2131 
         2132 char gopherbuffer[16 * 1024 * 1024];
         2133 
         2134 void
         2135 interact(void)
         2136 {
         2137 }
         2138 
         2139 void
         2140 loop(void)
         2141 {
         2142         char c;
         2143 
         2144         do {
         2145                 switch (c) {
         2146                 case 'h':
         2147                         break;
         2148                 case 'j':
         2149                         break;
         2150                 case 'k':
         2151                         break;
         2152                 case 'l':
         2153                         break;
         2154                 case ' ':
         2155                         interact();
         2156                         break;
         2157                 }
         2158         } while (1);
         2159 }
         2160 
         2161 int
         2162 main(int argc, char *argv[])
         2163 {
         2164         ssize_t l;
         2165         struct gopherentry *g;
         2166         char *host, *port, *selector;
         2167         struct termios termios, ntermios;
         2168 
         2169         if (argc < 2) {
         2170                 printf("%s host [port [selector]]\n", argv[0]);
         2171                 return 1;
         2172         }
         2173 
         2174 /*
         2175         memset(&termios, 0, sizeof(termios));
         2176         if (tcgetattr(0, &termios) == -1)
         2177                 return 1;
         2178 
         2179         memcpy(&ntermios, &termios, sizeof(termios));
         2180         ntermios.c_lflag &= ~(ICANON | ECHO);
         2181         ntermios.c_cc[VMIN] = 1;
         2182         ntermios.c_cc[VTIME] = 0;
         2183 
         2184         if (tcsetattr(0, TCSANOW, &ntermios) == -1)
         2185                 return 1;
         2186 */
         2187 
         2188         host = argv[1];
         2189         port = (argc > 2) ? argv[2] : "70";
         2190         selector = (argc > 3) ? argv[3] : "";
         2191 
         2192         do {
         2193                 l = gopher_request(host, port, selector, gopherbuffer, sizeof(gopherbuffer));
         2194                 if (l == -1)
         2195                         return 1;
         2196 
         2197                 l = gopher_parsemenu(gopherbuffer, &g);
         2198                 if (l == -1)
         2199                         return 1;
         2200 
         2201                 l = gopher_removeentries(g, l);
         2202 
         2203                 generatemap(host, port, selector, g, l);
         2204 /*
         2205                 rendermap();
         2206 */
         2207 
         2208 /*
         2209                 loop();
         2210 */
         2211         } while (0);
         2212 
         2213 /*
         2214         if (tcsetattr(0, TCSANOW, &termios) == -1)
         2215                 return 1;
         2216 */
         2217 
         2218         return 0;
         2219 }