Add gopher hackathon. - brcon2024-hackathons - Bitreichcon 2024 Hackathons
(HTM) git clone git://bitreich.org/brcon2024-hackathons git://enlrupgkhuxnvlhsf6lc3fziv5h2hhfrinws65d7roiv6bfj7d652fid.onion/brcon2024-hackathons
(DIR) Log
(DIR) Files
(DIR) Refs
(DIR) Tags
(DIR) Submodules
---
(DIR) commit ead4dcfa2976dac07e8fe03d23fa19424922b771
(DIR) parent 14f1a3a9baed42894753c5378cd2b3eb7ead1d4d
(HTM) Author: Christoph Lohmann <20h@r-36.net>
Date: Fri, 2 Aug 2024 15:45:14 +0200
Add gopher hackathon.
Diffstat:
A gopher/README.md | 28 ++++++++++++++++++++++++++++
A gopher/gopher-clustering/ga.c | 206 +++++++++++++++++++++++++++++++
A gopher/gopher-clustering/gopher-cl… | 2219 +++++++++++++++++++++++++++++++
A gopher/gopher-clustering/gopher-cl… | 1165 +++++++++++++++++++++++++++++++
A gopher/gopher-clustering/hint.md | 9 +++++++++
5 files changed, 3627 insertions(+), 0 deletions(-)
---
(DIR) diff --git a/gopher/README.md b/gopher/README.md
@@ -0,0 +1,28 @@
+# Gopher Hackathon
+
+## GopherVR
+
+Recreate GopherVR with modern methods:
+
+ https://github.com/michael-lazar/gopherVR
+ https://github.com/michael-lazar/gopherVR-models
+
+Here is more modern exports:
+
+ https://www.khronos.org/gltf/
+
+Just plumb gltf files into some usable game engine?
+
+ https://godotengine.org/article/we-should-all-use-gltf-20-export-3d-assets-game-engines/
+
+## Gopher Clustering
+
+In gopher-clustering/ you find ideas by pazz0 for how to scan gopher
+menus and allow some clustering to topics.
+
+How can this be optimized and made a common algorithm?
+
+This will help further gopher automated development and new UIs, like
+GopherVR.
+
+
(DIR) diff --git a/gopher/gopher-clustering/ga.c b/gopher/gopher-clustering/ga.c
@@ -0,0 +1,206 @@
+#include <stdint.h>
+#include <stdio.h>
+#include <stddef.h>
+#include <stdlib.h>
+#include <string.h>
+
+struct machine {
+ uint8_t a;
+
+ uint8_t loads;
+ uint8_t adds;
+ uint8_t subs;
+ uint8_t times;
+ uint8_t noops;
+
+ uint8_t instr;
+};
+
+enum {
+ LOAD,
+ ADD,
+ SUB,
+ TIMES,
+ END,
+};
+
+#define CODE_LENGTH 16
+#define POOL_LENGTH 16
+struct genome {
+ uint8_t code[CODE_LENGTH + 1]; /* +1 sentinal (0) */
+ uint16_t fitness;
+} pool[POOL_LENGTH];
+
+uint8_t newcode[CODE_LENGTH];
+
+void
+execute(struct machine *m, size_t l, uint8_t code[])
+{
+ size_t ip;
+ int16_t c;
+
+ for (ip = 0; ip < l;) {
+ switch (code[ip++] & 0x7) {
+ case LOAD:
+ m->loads++;
+ m->a = code[ip++];
+ break;
+ case ADD:
+ m->adds++;
+ c = (int16_t)m->a + code[ip++];
+ if (c > 255)
+ m->a = 0;
+ else
+ m->a = c;
+ break;
+ case SUB:
+ m->subs++;
+ c = (int16_t)m->a - code[ip++];
+ if (c < 0)
+ m->a = 0;
+ else
+ m->a = c;
+ break;
+ case TIMES:
+ m->times++;
+ m->a = (uint16_t)m->a * code[ip++] / 255;
+ break;
+ case END:
+ m->instr++;
+ return;
+ break;
+ default:
+ m->noops++;
+ break;
+ }
+ m->instr++;
+ }
+}
+
+void
+mutate(uint8_t code[], size_t l)
+{
+ uint8_t i, bi, j;
+
+ for (j = 0; j < 4; j++) {
+ if (rand() % 100 >= 50)
+ continue;
+
+ i = rand() % CODE_LENGTH;
+ bi = rand() % 8;
+
+ code[i] ^= 1 << bi;
+ }
+}
+
+#define min(a, b) (((a) < (b)) ? (a) : (b))
+
+void
+cross(uint8_t a[], size_t al, uint8_t b[], size_t bl, uint8_t c[], size_t cl)
+{
+ uint8_t i, si, sl;
+ uint8_t *one, *two;
+
+ if (rand() % 2) {
+ one = a;
+ two = b;
+ } else {
+ one = b;
+ two = a;
+ }
+
+ si = rand() % (min(al, bl) + 1);
+ sl = rand() % (min(al, bl) - si + 1);
+
+ for (i = 0; i < si; i++)
+ c[i] = one[i];
+
+ for (; i < si + sl; i++)
+ c[i] = two[i];
+
+ for (; i < cl; i++)
+ c[i] = one[i];
+}
+
+void
+cross2(uint8_t a[], size_t al, uint8_t b[], size_t bl, uint8_t c[], size_t cl)
+{
+ uint8_t i;
+
+ for (i = 0; i < al; i++)
+ c[i] = a[i] + b[i];
+}
+
+void
+printcode(uint8_t code[], size_t l)
+{
+ uint8_t i;
+
+ for (i = 0; i < l;) {
+ switch (code[i++] & 0x7) {
+ case LOAD:
+ printf("LOAD %d ", code[i++]);
+ break;
+ case ADD:
+ printf("ADD %d ", code[i++]);
+ break;
+ case SUB:
+ printf("SUB %d ", code[i++]);
+ break;
+ case TIMES:
+ printf("TIMES %d ", code[i++]);
+ break;
+ case END:
+ printf("END ");
+ return;
+ break;
+ default:
+ printf("NOOP ");
+ break;
+ }
+ }
+}
+
+int
+main(void)
+{
+ struct machine m;
+ struct genome *g1, *g2, *w1;
+ size_t i, j;
+
+ for (i = 0; i < POOL_LENGTH; i++) {
+ for (j = 0; j < CODE_LENGTH; j++)
+ pool[i].code[j] = rand();
+ pool[i].code[j] = 0;
+ }
+
+ do {
+ g1 = g2 = w1 = NULL;
+
+ for (i = 0; i < POOL_LENGTH; i++) {
+ memset(&m, 0, sizeof(m));
+
+ execute(&m, CODE_LENGTH, pool[i].code);
+ pool[i].fitness = m.a * 255 / (m.instr + m.loads);
+
+ if (!g1 || g1->fitness < pool[i].fitness) {
+ g2 = g1;
+ g1 = &pool[i];
+ } else if (!g2 || g2->fitness < pool[i].fitness) {
+ w1 = g2;
+ g2 = &pool[i];
+ } else if (!w1 || w1->fitness > pool[i].fitness) {
+ w1 = &pool[i];
+ }
+ }
+
+ printf("g1->fitness = %d, g2->fitness = %d, w1->fitness = %d\n", g1->fitness, g2->fitness, w1->fitness);
+ cross(g1->code, CODE_LENGTH, g2->code, CODE_LENGTH, newcode, CODE_LENGTH);
+ mutate(newcode, CODE_LENGTH);
+
+ printcode(newcode, CODE_LENGTH);
+ puts("");
+
+ memcpy(w1->code, newcode, CODE_LENGTH);
+ } while (1);
+}
(DIR) diff --git a/gopher/gopher-clustering/gopher-clustering.c b/gopher/gopher-clustering/gopher-clustering.c
@@ -0,0 +1,2219 @@
+#define _POSIX_C_SOURCE 200112L
+
+#include <assert.h>
+#include <stdarg.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <math.h>
+
+#include <netdb.h>
+#include <sys/socket.h>
+#include <sys/types.h>
+#include <termios.h>
+#include <unistd.h>
+
+uint32_t
+fnv1a(int n,...)
+{
+ int i;
+ char *s;
+ va_list l;
+ uint32_t h;
+
+ h = 0x811c9dc5;
+
+ va_start(l, n);
+ for (i = 0; i < n; i++) {
+ for (s = va_arg(l, char*); *s; s++) {
+ h ^= *s;
+ h *= 0x01000193;
+ }
+ }
+ va_end(l);
+
+ return h;
+}
+
+uint32_t
+xorshift(uint32_t *s)
+{
+ *s ^= *s << 13;
+ *s ^= *s >> 17;
+ *s ^= *s << 5;
+ return *s;
+}
+
+/*
+ https://github.com/skeeto/hash-prospector/issues/19#issuecomment-1120105785
+*/
+uint32_t
+mix(uint32_t x)
+{
+ x ^= x >> 16;
+ x *= 0x21f0aaad;
+ x ^= x >> 15;
+ x *= 0x735a2d97;
+ x ^= x >> 16;
+ return x;
+}
+
+struct cell {
+ void *data;
+ char c;
+};
+
+#define MAPHEIGHT (50)
+#define MAPWIDTH (160)
+struct cell map[MAPHEIGHT][MAPWIDTH];
+
+struct gopherentry {
+ struct gopherentry *next;
+ char type;
+ char *fields[4];
+ size_t i, s;
+};
+
+char *
+gopher_fieldend(char *s)
+{
+ for (; *s; s++) {
+ if (*s == '\t')
+ break;
+ if (*s == '\r' && *(s+1) == '\n')
+ break;
+ if (*s == '\n')
+ break;
+ }
+ return s;
+}
+
+char *
+gopher_nextentry(char *s)
+{
+ char *c;
+
+ if (c = strchr(s, '\n'))
+ return c + 1;
+ return NULL;
+}
+
+void *
+xmalloc(size_t s)
+{
+ void *p;
+
+ if (!(p = malloc(s))) {
+ perror(NULL);
+ exit(1);
+ }
+ return p;
+}
+
+void *
+xrealloc(void *p, size_t s)
+{
+ if (!(p = realloc(p, s))) {
+ perror(NULL);
+ exit(1);
+ }
+ return p;
+}
+
+/* Allow a lot of ugly things... */
+size_t
+gopher_parsemenu(char *d, struct gopherentry **g)
+{
+ ssize_t gl;
+ size_t fl, i, s;
+ char *c, *p, pt;
+ struct gopherentry *cg;
+
+ gl = 0;
+ cg = NULL;
+
+ s = 0;
+ pt = 0;
+ for (c = d; c && *c; c = gopher_nextentry(c)) {
+ if (*c == '.')
+ break;
+ if (*c == '\n')
+ continue;
+
+ gl++;
+ cg = xrealloc(cg, gl * sizeof(struct gopherentry));
+ memset(&cg[gl-1], 0, sizeof(struct gopherentry));
+
+ if (*c != 'i' && pt == 'i')
+ s++;
+ pt = *c;
+
+ cg[gl-1].type = *c;
+ cg[gl-1].i = gl-1;
+ cg[gl-1].s = s;
+ c++;
+
+ for (i = 0; i < 4; i++) {
+ p = gopher_fieldend(c);
+ fl = p - c;
+ cg[gl-1].fields[i] = xmalloc(fl + 1);
+ memcpy(cg[gl-1].fields[i], c, fl);
+ cg[gl-1].fields[i][fl] = '\0';
+ if (*p != '\t')
+ break;
+ c = p + 1;
+ }
+ }
+ s++;
+ *g = cg;
+ return gl;
+}
+
+ssize_t
+gopher_removeentries(struct gopherentry *g, size_t l)
+{
+ size_t i, j;
+
+ for (i = 0; i < l;) {
+ if ((g[i].type == 'i' && g[i].type == '3') || !g[i].fields[1] || !g[i].fields[2] || !g[i].fields[3]) {
+ l--;
+ memmove(&g[i], &g[i+1], (l - i) * sizeof(*g));
+ continue;
+ }
+ for (j = 0; j < i; j++) {
+ if (!strcmp(g[i].fields[1], g[j].fields[1]) &&
+ !strcmp(g[i].fields[2], g[j].fields[2]) &&
+ !strcmp(g[i].fields[3], g[j].fields[3]))
+ break;
+ }
+ if (j < i) {
+ l--;
+ memmove(&g[i], &g[i+1], (l - i) * sizeof(*g));
+ continue;
+ }
+ i++;
+ }
+
+ return l;
+}
+
+ssize_t
+gopher_request(char *host, char *port, char *selector, char *buffer, size_t l)
+{
+ int fd;
+ struct addrinfo hints;
+ struct addrinfo *r, *rp;
+ ssize_t n, result, rn;
+
+ memset(&hints, 0, sizeof(hints));
+ hints.ai_family = AF_UNSPEC;
+ hints.ai_socktype = SOCK_STREAM;
+
+ if (getaddrinfo(host, port, &hints, &r) != 0)
+ return -1;
+
+ for (rp = r; rp; rp = rp->ai_next) {
+ if ((fd = socket(rp->ai_family, rp->ai_socktype, rp->ai_protocol)) == -1)
+ continue;
+ if (connect(fd, rp->ai_addr, rp->ai_addrlen) != -1)
+ break;
+ close(fd);
+ }
+ freeaddrinfo(r);
+ if (!rp)
+ return -1;
+
+ result = -1;
+ if (write(fd, selector, strlen(selector)) != strlen(selector))
+ goto cleanup;
+
+ if (write(fd, "\r\n", 2) != 2)
+ goto cleanup;
+
+ n = 0;
+ while (1) {
+ if (n + 512 > l - 1)
+ goto cleanup;
+ if ((rn = read(fd, &buffer[n], 512)) == -1)
+ goto cleanup;
+ else if (!rn)
+ break;
+ n += rn;
+ }
+
+ buffer[n] = '\0';
+ result = n;
+
+cleanup:
+ close(fd);
+
+ return result;
+}
+
+size_t
+triangularnumber(size_t n)
+{
+ return (n * n + n) / 2;
+}
+
+size_t
+distanceindex(size_t i, size_t j)
+{
+ size_t t;
+
+ if (i < j) {
+ t = i;
+ i = j;
+ j = t;
+ }
+
+ return triangularnumber(i) + j;
+}
+
+/*
+ https://en.wikipedia.org/wiki/Levenshtein_distance#Iterative_with_two_matrix_rows
+ Which references https://www.codeproject.com/Articles/13525/Fast-memory-efficient-Levenshtein-algorithm-2
+ The code there is using the CPOL: https://www.codeproject.com/info/cpol10.aspx
+*/
+size_t
+levenshteindistance(char *a, char *b)
+{
+ size_t i, j, dc, ic, sc, r;
+ size_t *v0, *v1, *t;
+
+ v0 = xmalloc((strlen(b) + 1) * sizeof(*v0));
+ v1 = xmalloc((strlen(b) + 1) * sizeof(*v1));
+
+ for (i = 0; i <= strlen(b); i++)
+ v0[i] = i;
+
+ for (i = 0; i < strlen(a); i++) {
+ v1[0] = i + 1;
+ for (j = 0; j < strlen(b); j++) {
+ dc = v0[j + 1] + 1;
+ ic = v1[j] + 1;
+ if (a[i] == b[j])
+ sc = v0[j];
+ else
+ sc = v0[j] + 1;
+ if (dc < ic && dc < sc)
+ v1[j + 1] = dc;
+ else if (ic < dc && ic < sc)
+ v1[j + 1] = ic;
+ else
+ v1[j + 1] = sc;
+ }
+ t = v0;
+ v0 = v1;
+ v1 = t;
+ }
+
+ r = v0[strlen(b)];
+
+ free(v1);
+ free(v0);
+
+ return r;
+}
+
+/*
+ Remove common prefix and suffix. The pazz0distance is the sum of the length of the leftover strings.
+*/
+size_t
+pazz0distance(char *a, char *b)
+{
+ size_t pl, sl;
+ size_t al, bl;
+
+ al = strlen(a);
+ bl = strlen(b);
+
+ for (pl = 0; pl < al && pl < bl && a[pl] == b[pl]; pl++);
+ for (sl = 0; al - sl > pl && bl - sl > pl && a[al - 1 - sl] == b[bl - 1 - sl]; sl++);
+
+ return al + bl - 2 * (pl + sl);
+}
+
+struct point {
+ double dims[2];
+};
+
+void
+sammon(double *distances, size_t l, uint32_t prng, struct point *points)
+{
+ size_t i, j, k, t, tmax;
+ double *gnarf;
+ double c, lr;
+ double e1, e2, sum, lasterror;
+ struct point *newpoints;
+
+ gnarf = calloc(triangularnumber(l), sizeof(*gnarf));
+ newpoints = calloc(l, sizeof(*newpoints));
+
+ for (i = 0; i < l; i++) {
+ points[i].dims[0] = (double)xorshift(&prng) / UINT32_MAX - 0.5;
+ points[i].dims[1] = (double)xorshift(&prng) / UINT32_MAX - 0.5;
+ }
+
+ c = 0;
+ for (i = 0; i < l; i++) {
+ gnarf[distanceindex(i, i)] = 0;
+ for (j = 0; j < i; j++) {
+ gnarf[distanceindex(i, j)] = sqrt(pow(points[i].dims[0] - points[j].dims[0], 2) + pow(points[i].dims[1] - points[j].dims[1], 2));
+ c += distances[distanceindex(i, j)];
+ }
+ }
+
+ lasterror = 0;
+ for (t = 0, tmax = 1 << 12; t < tmax; t++) {
+ lr = 0.01;
+ for (i = 0; i < l; i++) {
+ for (j = 0; j < 2; j++) {
+ e1 = -2. / c;
+ sum = 0;
+ for (k = 0; k < l; k++) {
+ if (i == k)
+ continue;
+ sum += (distances[distanceindex(i, k)] - gnarf[distanceindex(i, k)]) / (distances[distanceindex(i, k)] * gnarf[distanceindex(i, k)]) * (points[i].dims[j] - points[k].dims[j]);
+ }
+ e1 *= sum;
+
+ e2 = -2. / c;
+ sum = 0;
+ for (k = 0; k < l; k++) {
+ if (i == k)
+ continue;
+ sum += 1. / (distances[distanceindex(i, k)] * gnarf[distanceindex(i, k)]) * ((distances[distanceindex(i, k)] - gnarf[distanceindex(i, k)]) - pow(points[i].dims[j] - points[k].dims[j], 2) / gnarf[distanceindex(i, k)] * (1 + (distances[distanceindex(i, k)] - gnarf[distanceindex(i, k)]) / gnarf[distanceindex(i, k)]));
+ }
+ e2 *= sum;
+
+ newpoints[i].dims[j] = points[i].dims[j] - lr * (e1 / fabs(e2));
+ }
+ }
+
+ sum = 0;
+ for (i = 0; i < l; i++) {
+ for (j = 0; j < i; j++) {
+ gnarf[distanceindex(i, j)] = sqrt(pow(newpoints[i].dims[0] - newpoints[j].dims[0], 2) + pow(newpoints[i].dims[1] - newpoints[j].dims[1], 2));
+ sum += pow(distances[distanceindex(i, j)] - gnarf[distanceindex(i, j)], 2) / distances[distanceindex(i, j)];
+ }
+ }
+printf("%lf\n", sum / c);
+if (t > 0 && fabs(sum / c - lasterror) < 0.000000000001)
+ break;
+lasterror = sum / c;
+
+ memcpy(points, newpoints, l * sizeof(*points));
+ }
+
+ free(newpoints);
+ free(gnarf);
+}
+
+int
+dynmsc(struct gopherentry *g, size_t l, size_t k, double *distances, size_t *assoc, uint32_t prng)
+{
+ size_t i, j, n, ck;
+ ssize_t m1, m, m2;
+ size_t ti, th, huh;
+ size_t iter;
+ double dsum, d, d2, gnarf, tdsum;
+ struct {
+ size_t medoid;
+ size_t n;
+ double dsum;
+ } *clusters;
+ struct {
+ size_t n1, n2, n3;
+ double d1, d2, d3;
+ } *cache;
+ double *dS, *dS2;
+ double cdS, cdS2;
+ size_t cm, cx;
+ ssize_t lx;
+ int flag, result;
+ uint32_t r;
+ double *C;
+
+ if (!l)
+ return 0;
+
+ if (l < k)
+ k = l;
+
+ clusters = calloc(k, sizeof(*clusters));
+ if (!clusters)
+ return -1;
+
+ result = -1;
+
+/*
+ Seed the medoids randomly
+*/
+ for (i = n = 0; i < l; i++) {
+ if ((l - i) * (xorshift(&prng) / (double)UINT32_MAX) < k - n)
+ clusters[n++].medoid = i;
+ if (n == k)
+ break;
+ }
+
+ cache = calloc(l, sizeof(*cache));
+ if (!cache)
+ goto cleanup;
+
+ dS = calloc(k, sizeof(*dS));
+ if (!dS)
+ goto cleanup;
+
+ dS2 = calloc(k, sizeof(*dS2));
+ if (!dS2)
+ goto cleanup;
+
+while (k > 3) {
+ for (i = 0; i < l; i++) {
+ cache[i].d1 = cache[i].d2 = cache[i].d3 = INFINITY;
+ for (j = 0; j < k; j++) {
+ d = distances[distanceindex(i, clusters[j].medoid)];
+ if (d < cache[i].d1) {
+ cache[i].d3 = cache[i].d2;
+ cache[i].d2 = cache[i].d1;
+ cache[i].d1 = d;
+ cache[i].n3 = cache[i].n2;
+ cache[i].n2 = cache[i].n1;
+ cache[i].n1 = j;
+ } else if (d < cache[i].d2) {
+ cache[i].d3 = cache[i].d2;
+ cache[i].d2 = d;
+ cache[i].n3 = cache[i].n2;
+ cache[i].n2 = j;
+ } else if (d < cache[i].d3) {
+ cache[i].d3 = d;
+ cache[i].n3 = j;
+ }
+ }
+ }
+
+ for (i = 0; i < k; i++) {
+ dS[i] = 0;
+ for (j = 0; j < l; j++) {
+ if (cache[j].n1 != i)
+ continue;
+ dS[i] += cache[j].d1 / cache[j].d2 - cache[j].d2 / cache[j].d3;
+ }
+ for (j = 0; j < l; j++) {
+ if (cache[j].n2 != i)
+ continue;
+ dS[i] += cache[j].d1 / cache[j].d2 - cache[j].d1 / cache[j].d3;
+ }
+ }
+
+ lx = -1;
+ for (iter = 0; iter < 100; iter++) {
+ cdS = 0;
+ for (i = 0; i < l; i++) {
+ if (i == lx)
+ goto fastermsc_finished;
+
+ for (j = 0; j < k; j++) {
+ if (clusters[j].medoid == i)
+ goto fastmsc_nexti;
+ }
+
+ memcpy(dS2, dS, k * sizeof(*dS2));
+ cdS2 = 0;
+ for (j = 0; j < l; j++) {
+ d = distances[distanceindex(i, j)];
+ if (d < cache[j].d1) {
+ cdS2 += cache[j].d1 / cache[j].d2 - d / cache[j].d1;
+ dS2[cache[j].n1] += d / cache[j].d1 + cache[j].d2 / cache[j].d3 - (cache[j].d1 + d) / cache[j].d2;
+ dS2[cache[j].n2] += cache[j].d1 / cache[j].d3 - cache[j].d1 / cache[j].d2;
+ } else if (d < cache[j].d2) {
+ cdS2 += cache[j].d1 / cache[j].d2 - cache[j].d1 / d;
+ dS2[cache[j].n1] += cache[j].d1 / d + cache[j].d2 / cache[j].d3 - (cache[j].d1 + d) / cache[j].d2;
+ dS2[cache[j].n2] += cache[j].d1 / cache[j].d3 - cache[j].d1 / cache[j].d2;
+ } else if (d < cache[j].d3) {
+ dS2[cache[j].n1] += cache[j].d2 / cache[j].d3 - cache[j].d2 / d;
+ dS2[cache[j].n2] += cache[j].d1 / cache[j].d3 - cache[j].d1 / d;
+ }
+ }
+ d = -INFINITY;
+ for (j = 0; j < k; j++) {
+ if (dS2[j] > d) {
+ d = dS2[j];
+ m = j;
+ }
+ }
+ dS2[m] += cdS2;
+ if (dS2[m] <= 0)
+ continue;
+//printf("%lf\t%lf\n", dS2[m], cdS2);
+printf("%lu (%lu) <-> %lu\n", clusters[m].medoid, (size_t)m, i);
+ lx = i;
+ clusters[m].medoid = i;
+ for (j = 0; j < l; j++) {
+ if (cache[j].n1 != m && cache[j].n2 != m && cache[j].n3 != m)
+ continue;
+ cache[j].d1 = cache[j].d2 = cache[j].d3 = INFINITY;
+ for (n = 0; n < k; n++) {
+ d = distances[distanceindex(j, clusters[n].medoid)];
+ if (d < cache[j].d1) {
+ cache[j].d3 = cache[j].d2;
+ cache[j].d2 = cache[j].d1;
+ cache[j].d1 = d;
+ cache[j].n3 = cache[j].n2;
+ cache[j].n2 = cache[j].n1;
+ cache[j].n1 = n;
+ } else if (d < cache[j].d2) {
+ cache[j].d3 = cache[j].d2;
+ cache[j].d2 = d;
+ cache[j].n3 = cache[j].n2;
+ cache[j].n2 = n;
+ } else if (d < cache[j].d3) {
+ cache[j].d3 = d;
+ cache[j].n3 = n;
+ }
+ }
+ }
+ for (j = 0; j < k; j++) {
+ dS[j] = 0;
+ for (n = 0; n < l; n++) {
+ if (cache[n].n1 != j)
+ continue;
+ dS[j] += cache[n].d1 / cache[n].d2 - cache[n].d2 / cache[n].d3;
+ }
+ for (n = 0; n < l; n++) {
+ if (cache[n].n2 != j)
+ continue;
+ dS[j] += cache[n].d1 / cache[n].d2 - cache[n].d1 / cache[n].d3;
+ }
+ }
+printf("lx = %lu\n", i);
+fastmsc_nexti:;
+ }
+ if (lx == -1)
+ break;
+ }
+fastermsc_finished:;
+ d = INFINITY;
+ for (j = 0; j < k; j++) {
+ if (dS2[j] < d) {
+ d = dS2[j];
+ m = j;
+ }
+ }
+ if (m < k - 1)
+ memmove(&clusters[m], &clusters[m + 1], k - m - 1);
+ k--;
+printf("k = %lu\n", k);
+}
+
+ for (i = 0; i < k; i++)
+ clusters[i].n = 0;
+
+ for (i = 0; i < l; i++) {
+ d = INFINITY;
+ for (j = 0; j < k; j++) {
+ if (distances[distanceindex(clusters[j].medoid, i)] < d) {
+ assoc[i] = j;
+ d = distances[distanceindex(clusters[j].medoid, i)];
+ }
+ }
+ clusters[assoc[i]].n++;
+ }
+
+ dsum = 0;
+ for (i = 0; i < l; i++) {
+ d = INFINITY;
+ d2 = INFINITY;
+ for (j = 0; j < k; j++) {
+ if (distances[distanceindex(clusters[j].medoid, i)] < d) {
+ d2 = d;
+ d = distances[distanceindex(clusters[j].medoid, i)];
+ } else if (distances[distanceindex(clusters[j].medoid, i)] < d2) {
+ d2 = distances[distanceindex(clusters[j].medoid, i)];
+ }
+ }
+ if (clusters[assoc[i]].medoid != i)
+ dsum += 1 - d / (d2 + 0.000000000001);
+ printf("%lu\t%lu\t%lf\t%lf\t%lf\t%lu\t%s\n", i, assoc[i], 1 - d / (d2 + 0.000000000001), d, d2, g[i].s, g[i].fields[1]);
+ }
+ printf("Silhouette = %lf\n", dsum / (l - k));
+
+ result = k;
+cleanup:
+ free(clusters);
+ return result;
+}
+
+int
+fastermsc(struct gopherentry *g, size_t l, size_t k, double *distances, size_t *assoc, uint32_t prng)
+{
+ size_t i, j, n, ck;
+ ssize_t m1, m, m2;
+ size_t ti, th, huh;
+ double dsum, d, d2, gnarf, tdsum;
+ struct {
+ size_t medoid;
+ size_t n;
+ double dsum;
+ } *clusters;
+ struct {
+ size_t n1, n2, n3;
+ double d1, d2, d3;
+ } *cache;
+ double *dS, *dS2;
+ double cdS, cdS2;
+ size_t cm, cx;
+ ssize_t lx;
+ int flag, result;
+ uint32_t r;
+ double *C;
+
+ if (!l)
+ return 0;
+
+ if (l < k)
+ k = l;
+
+ clusters = calloc(k, sizeof(*clusters));
+ if (!clusters)
+ return -1;
+
+ result = -1;
+
+/*
+ Seed the medoids randomly
+*/
+ for (i = n = 0; i < l; i++) {
+ if ((l - i) * (xorshift(&prng) / (double)UINT32_MAX) < k - n)
+ clusters[n++].medoid = i;
+ if (n == k)
+ break;
+ }
+
+ cache = calloc(l, sizeof(*cache));
+ if (!cache)
+ goto cleanup;
+
+ dS = calloc(k, sizeof(*dS));
+ if (!dS)
+ goto cleanup;
+
+ dS2 = calloc(k, sizeof(*dS2));
+ if (!dS2)
+ goto cleanup;
+
+ for (i = 0; i < l; i++) {
+ cache[i].d1 = cache[i].d2 = cache[i].d3 = INFINITY;
+ for (j = 0; j < k; j++) {
+ d = distances[distanceindex(i, clusters[j].medoid)];
+ if (d < cache[i].d1) {
+ cache[i].d3 = cache[i].d2;
+ cache[i].d2 = cache[i].d1;
+ cache[i].d1 = d;
+ cache[i].n3 = cache[i].n2;
+ cache[i].n2 = cache[i].n1;
+ cache[i].n1 = j;
+ } else if (d < cache[i].d2) {
+ cache[i].d3 = cache[i].d2;
+ cache[i].d2 = d;
+ cache[i].n3 = cache[i].n2;
+ cache[i].n2 = j;
+ } else if (d < cache[i].d3) {
+ cache[i].d3 = d;
+ cache[i].n3 = j;
+ }
+ }
+ }
+
+ for (i = 0; i < k; i++) {
+ dS[i] = 0;
+ for (j = 0; j < l; j++) {
+ if (cache[j].n1 != i)
+ continue;
+ dS[i] += cache[j].d1 / cache[j].d2 - cache[j].d2 / cache[j].d3;
+ }
+ for (j = 0; j < l; j++) {
+ if (cache[j].n2 != i)
+ continue;
+ dS[i] += cache[j].d1 / cache[j].d2 - cache[j].d1 / cache[j].d3;
+ }
+ }
+
+ lx = -1;
+ for (;;) {
+ cdS = 0;
+ for (i = 0; i < l; i++) {
+ if (i == lx)
+ goto fastermsc_finished;
+
+ for (j = 0; j < k; j++) {
+ if (clusters[j].medoid == i)
+ goto fastmsc_nexti;
+ }
+
+ memcpy(dS2, dS, k * sizeof(*dS2));
+ cdS2 = 0;
+ for (j = 0; j < l; j++) {
+ d = distances[distanceindex(i, j)];
+ if (d < cache[j].d1) {
+ cdS2 += cache[j].d1 / cache[j].d2 - d / cache[j].d1;
+ dS2[cache[j].n1] += d / cache[j].d1 + cache[j].d2 / cache[j].d3 - (cache[j].d1 + d) / cache[j].d2;
+ dS2[cache[j].n2] += cache[j].d1 / cache[j].d3 - cache[j].d1 / cache[j].d2;
+ } else if (d < cache[j].d2) {
+ cdS2 += cache[j].d1 / cache[j].d2 - cache[j].d1 / d;
+ dS2[cache[j].n1] += cache[j].d1 / d + cache[j].d2 / cache[j].d3 - (cache[j].d1 + d) / cache[j].d2;
+ dS2[cache[j].n2] += cache[j].d1 / cache[j].d3 - cache[j].d1 / cache[j].d2;
+ } else if (d < cache[j].d3) {
+ dS2[cache[j].n1] += cache[j].d2 / cache[j].d3 - cache[j].d2 / d;
+ dS2[cache[j].n2] += cache[j].d1 / cache[j].d3 - cache[j].d1 / d;
+ }
+ }
+ d = -INFINITY;
+ for (j = 0; j < k; j++) {
+ if (dS2[j] > d) {
+ d = dS2[j];
+ m = j;
+ }
+ }
+ dS2[m] += cdS2;
+ if (dS2[m] <= 0)
+ continue;
+//printf("%lf\t%lf\n", dS2[m], cdS2);
+printf("%lu (%lu) <-> %lu\n", clusters[m].medoid, (size_t)m, i);
+ lx = i;
+ clusters[m].medoid = i;
+ for (j = 0; j < l; j++) {
+ if (cache[j].n1 != m && cache[j].n2 != m && cache[j].n3 != m)
+ continue;
+ cache[j].d1 = cache[j].d2 = cache[j].d3 = INFINITY;
+ for (n = 0; n < k; n++) {
+ d = distances[distanceindex(j, clusters[n].medoid)];
+ if (d < cache[j].d1) {
+ cache[j].d3 = cache[j].d2;
+ cache[j].d2 = cache[j].d1;
+ cache[j].d1 = d;
+ cache[j].n3 = cache[j].n2;
+ cache[j].n2 = cache[j].n1;
+ cache[j].n1 = n;
+ } else if (d < cache[j].d2) {
+ cache[j].d3 = cache[j].d2;
+ cache[j].d2 = d;
+ cache[j].n3 = cache[j].n2;
+ cache[j].n2 = n;
+ } else if (d < cache[j].d3) {
+ cache[j].d3 = d;
+ cache[j].n3 = n;
+ }
+ }
+ }
+ for (j = 0; j < k; j++) {
+ dS[j] = 0;
+ for (n = 0; n < l; n++) {
+ if (cache[n].n1 != j)
+ continue;
+ dS[j] += cache[n].d1 / cache[n].d2 - cache[n].d2 / cache[n].d3;
+ }
+ for (n = 0; n < l; n++) {
+ if (cache[n].n2 != j)
+ continue;
+ dS[j] += cache[n].d1 / cache[n].d2 - cache[n].d1 / cache[n].d3;
+ }
+ }
+printf("lx = %lu\n", i);
+fastmsc_nexti:;
+ }
+ if (lx == -1)
+ break;
+ }
+fastermsc_finished:;
+
+ for (i = 0; i < k; i++)
+ clusters[i].n = 0;
+
+ for (i = 0; i < l; i++) {
+ d = INFINITY;
+ for (j = 0; j < k; j++) {
+ if (distances[distanceindex(clusters[j].medoid, i)] < d) {
+ assoc[i] = j;
+ d = distances[distanceindex(clusters[j].medoid, i)];
+ }
+ }
+ clusters[assoc[i]].n++;
+ }
+
+ dsum = 0;
+ for (i = 0; i < l; i++) {
+ d = INFINITY;
+ d2 = INFINITY;
+ for (j = 0; j < k; j++) {
+ if (distances[distanceindex(clusters[j].medoid, i)] < d) {
+ d2 = d;
+ d = distances[distanceindex(clusters[j].medoid, i)];
+ } else if (distances[distanceindex(clusters[j].medoid, i)] < d2) {
+ d2 = distances[distanceindex(clusters[j].medoid, i)];
+ }
+ }
+ if (clusters[assoc[i]].medoid != i)
+ dsum += 1 - d / (d2 + 0.000000000001);
+ printf("%lu\t%lu\t%lf\t%lf\t%lf\t%lu\t%s\n", i, assoc[i], 1 - d / (d2 + 0.000000000001), d, d2, g[i].s, g[i].fields[1]);
+ }
+ printf("Silhouette = %lf\n", dsum / (l - k));
+
+ result = k;
+cleanup:
+ free(clusters);
+ return result;
+}
+
+int
+fastmsc(struct gopherentry *g, size_t l, size_t k, double *distances, size_t *assoc, uint32_t prng)
+{
+ size_t i, j, n, ck;
+ ssize_t m1, m, m2;
+ size_t ti, th;
+ double dsum, d, d2, gnarf, tdsum;
+ struct {
+ size_t medoid;
+ size_t n;
+ double dsum;
+ } *clusters;
+ struct {
+ size_t n1, n2;
+ double d1, d2, d3;
+ } *cache;
+ double *dS, *dS2;
+ double cdS, cdS2;
+ size_t cm, cx;
+ int flag, result;
+ uint32_t r;
+ double *C;
+
+ if (!l)
+ return 0;
+
+ if (l < k)
+ k = l;
+
+ clusters = calloc(k, sizeof(*clusters));
+ if (!clusters)
+ return -1;
+
+ result = -1;
+
+/*
+ seed the medoids using PAM BUILD
+*/
+ d = INFINITY;
+ m = -1;
+ for (i = 0; i < l; i++) {
+ dsum = 0;
+ for (j = 0; j < l; j++)
+ dsum += distances[distanceindex(i, j)];
+ if (dsum < d) {
+ m = i;
+ d = dsum;
+ }
+ }
+ if (m == -1)
+ goto cleanup;
+ ck = 0;
+ clusters[ck].medoid = m;
+ ck++;
+
+ C = xmalloc(l * l * sizeof(*C));
+
+ for (; ck < k; ck++) {
+ memset(C, 0, l * l * sizeof(*C));
+ for (i = 0; i < l; i++) {
+ for (j = 0; j < ck; j++) {
+ if (clusters[j].medoid == i)
+ goto build_nexti;
+ }
+ for (j = 0; j < l; j++) {
+ if (i == j)
+ continue;
+ for (n = 0; n < ck; n++) {
+ if (clusters[n].medoid == j)
+ goto build_nextj;
+ }
+ d = INFINITY;
+ for (n = 0; n < ck; n++) {
+ if (distances[distanceindex(j, clusters[n].medoid)] < d)
+ d = distances[distanceindex(j, clusters[n].medoid)];
+ }
+ C[j * l + i] = d - distances[distanceindex(j, i)];
+ if (C[j * l + i] < 0)
+ C[j * l + i] = 0;
+build_nextj:;
+ }
+build_nexti:;
+ }
+ d = -1;
+ m = -1;
+ for (i = 0; i < l; i++) {
+ for (j = 0; j < ck; j++) {
+ if (clusters[j].medoid == i)
+ goto build_nexti2;
+ }
+ dsum = 0;
+ for (j = 0; j < l; j++)
+ dsum += C[j * l + i];
+ if (dsum > d) {
+ d = dsum;
+ m = i;
+ }
+build_nexti2:;
+ }
+ clusters[ck].medoid = m;
+ }
+ free(C);
+
+/*
+ Seed the medoids randomly
+ for (i = n = 0; i < l; i++) {
+ if ((l - i) * (xorshift(&prng) / (double)UINT32_MAX) < k - n)
+ clusters[n++].medoid = i;
+ if (n == k)
+ break;
+ }
+*/
+
+ cache = calloc(l, sizeof(*cache));
+ if (!cache)
+ goto cleanup;
+
+ dS = calloc(k, sizeof(*dS));
+ if (!dS)
+ goto cleanup;
+
+ dS2 = calloc(k, sizeof(*dS2));
+ if (!dS2)
+ goto cleanup;
+
+ for (;;) {
+ for (i = 0; i < l; i++) {
+ cache[i].d1 = cache[i].d2 = cache[i].d3 = INFINITY;
+ for (j = 0; j < k; j++) {
+ d = distances[distanceindex(i, clusters[j].medoid)];
+ if (d < cache[i].d1) {
+ cache[i].d3 = cache[i].d2;
+ cache[i].d2 = cache[i].d1;
+ cache[i].d1 = d;
+ cache[i].n2 = cache[i].n1;
+ cache[i].n1 = j;
+ } else if (d < cache[i].d2) {
+ cache[i].d3 = cache[i].d2;
+ cache[i].d2 = d;
+ cache[i].n2 = j;
+ } else if (d < cache[i].d3) {
+ cache[i].d3 = d;
+ }
+ }
+ }
+
+ for (i = 0; i < k; i++) {
+ dS[i] = 0;
+ for (j = 0; j < l; j++) {
+ if (cache[j].n1 != i)
+ continue;
+ dS[i] += cache[j].d1 / cache[j].d2 - cache[j].d2 / cache[j].d3;
+ }
+ for (j = 0; j < l; j++) {
+ if (cache[j].n2 != i)
+ continue;
+ dS[i] += cache[j].d1 / cache[j].d2 - cache[j].d1 / cache[j].d3;
+ }
+ }
+
+ cdS = 0;
+ for (i = 0; i < l; i++) {
+ for (j = 0; j < k; j++) {
+ if (clusters[j].medoid == i)
+ goto fastmsc_nexti;
+ }
+ memcpy(dS2, dS, k * sizeof(*dS2));
+ cdS2 = 0;
+ for (j = 0; j < l; j++) {
+ d = distances[distanceindex(i, j)];
+ if (d < cache[j].d1) {
+ cdS2 += cache[j].d1 / cache[j].d2 - d / cache[j].d1;
+ dS2[cache[j].n1] += d / cache[j].d1 + cache[j].d2 / cache[j].d3 - (cache[j].d1 + d) / cache[j].d2;
+ dS2[cache[j].n2] += cache[j].d1 / cache[j].d3 - cache[j].d1 / cache[j].d2;
+ } else if (d < cache[j].d2) {
+ cdS2 += cache[j].d1 / cache[j].d2 - cache[j].d1 / d;
+ dS2[cache[j].n1] += cache[j].d1 / d + cache[j].d2 / cache[j].d3 - (cache[j].d1 + d) / cache[j].d2;
+ dS2[cache[j].n2] += cache[j].d1 / cache[j].d3 - cache[j].d1 / cache[j].d2;
+ } else if (d < cache[j].d3) {
+ dS2[cache[j].n1] += cache[j].d2 / cache[j].d3 - cache[j].d2 / d;
+ dS2[cache[j].n2] += cache[j].d1 / cache[j].d3 - cache[j].d1 / d;
+ }
+ }
+ d = -INFINITY;
+ d2 = INFINITY;
+ for (j = 0; j < k; j++) {
+ if (dS2[j] > d) {
+ d = dS2[j];
+ m = j;
+ }
+ if (dS2[j] < d2) {
+ d2 = dS2[j];
+ m2 = j;
+ }
+ }
+printf("%lf (%ld)\t%lf (%ld)\n", d, m, d2, m2);
+ dS2[m] += cdS2;
+ if (dS2[m] > cdS) {
+ cdS = dS2[m];
+ cm = m;
+ cx = i;
+ }
+fastmsc_nexti:;
+ }
+ if (cdS <= 0)
+ break;
+ clusters[cm].medoid = cx;
+ }
+
+ for (i = 0; i < k; i++)
+ clusters[i].n = 0;
+
+ for (i = 0; i < l; i++) {
+ d = INFINITY;
+ for (j = 0; j < k; j++) {
+ if (distances[distanceindex(clusters[j].medoid, i)] < d) {
+ assoc[i] = j;
+ d = distances[distanceindex(clusters[j].medoid, i)];
+ }
+ }
+ clusters[assoc[i]].n++;
+ }
+
+ dsum = 0;
+ for (i = 0; i < l; i++) {
+ d = INFINITY;
+ d2 = INFINITY;
+ for (j = 0; j < k; j++) {
+ if (distances[distanceindex(clusters[j].medoid, i)] < d) {
+ d2 = d;
+ d = distances[distanceindex(clusters[j].medoid, i)];
+ } else if (distances[distanceindex(clusters[j].medoid, i)] < d2) {
+ d2 = distances[distanceindex(clusters[j].medoid, i)];
+ }
+ }
+ if (clusters[assoc[i]].medoid != i)
+ dsum += 1 - d / (d2 + 0.000000000001);
+ printf("%lu\t%lu\t%lf\t%lf\t%lf\t%lu\t%s\n", i, assoc[i], 1 - d / (d2 + 0.000000000001), d, d2, g[i].s, g[i].fields[1]);
+ }
+ printf("Silhouette = %lf\n", dsum / (l - k));
+
+ result = k;
+cleanup:
+ free(clusters);
+ return result;
+}
+
+/*
+ Seeding of medoids based on the PAM BUILD description in https://www.cs.umb.edu/cs738/pam1.pdf
+ I was too stupid to understand other descriptions.
+*/
+int
+kmedoids(struct gopherentry *g, size_t l, size_t k, double *distances, size_t *assoc, uint32_t prng)
+{
+ size_t i, j, n, ck;
+ ssize_t m1, m;
+ size_t ti, th;
+ double dsum, d, d2, gnarf, tdsum;
+ struct {
+ size_t medoid;
+ size_t n;
+ double dsum;
+ } *clusters;
+ int flag, result;
+ uint32_t r;
+ double *C;
+
+ if (!l)
+ return 0;
+
+ if (l < k)
+ k = l;
+
+ clusters = calloc(k, sizeof(*clusters));
+ if (!clusters)
+ return -1;
+
+ result = -1;
+
+ /* seed the medoids using PAM BUILD */
+ d = INFINITY;
+ m = -1;
+ for (i = 0; i < l; i++) {
+ dsum = 0;
+ for (j = 0; j < l; j++)
+ dsum += distances[distanceindex(i, j)];
+ if (dsum < d) {
+ m = i;
+ d = dsum;
+ }
+ }
+ if (m == -1)
+ goto cleanup;
+ ck = 0;
+ clusters[ck].medoid = m;
+ ck++;
+
+ C = xmalloc(l * l * sizeof(*C));
+
+ for (; ck < k; ck++) {
+ memset(C, 0, l * l * sizeof(*C));
+ for (i = 0; i < l; i++) {
+ for (j = 0; j < ck; j++) {
+ if (clusters[j].medoid == i)
+ goto build_nexti;
+ }
+ for (j = 0; j < l; j++) {
+ if (i == j)
+ continue;
+ for (n = 0; n < ck; n++) {
+ if (clusters[n].medoid == j)
+ goto build_nextj;
+ }
+ d = INFINITY;
+ for (n = 0; n < ck; n++) {
+ if (distances[distanceindex(j, clusters[n].medoid)] < d)
+ d = distances[distanceindex(j, clusters[n].medoid)];
+ }
+ C[j * l + i] = d - distances[distanceindex(j, i)];
+ if (C[j * l + i] < 0)
+ C[j * l + i] = 0;
+build_nextj:;
+ }
+build_nexti:;
+ }
+ d = -1;
+ m = -1;
+ for (i = 0; i < l; i++) {
+ for (j = 0; j < ck; j++) {
+ if (clusters[j].medoid == i)
+ goto build_nexti2;
+ }
+ dsum = 0;
+ for (j = 0; j < l; j++)
+ dsum += C[j * l + i];
+ if (dsum > d) {
+ d = dsum;
+ m = i;
+ }
+build_nexti2:;
+ }
+ clusters[ck].medoid = m;
+ }
+ free(C);
+
+/*
+ Seed the medoids randomly
+ for (i = n = 0; i < l; i++) {
+ if ((l - i) * (xorshift(&prng) / (double)UINT32_MAX) < k - n)
+ clusters[n++].medoid = i;
+ if (n == k)
+ break;
+ }
+*/
+
+ for (;;) {
+ tdsum = INFINITY;
+ for (i = 0; i < k; i++) {
+ for (j = 0; j < l; j++) {
+ for (n = 0; n < k; n++) {
+ if (j == clusters[n].medoid)
+ goto swap_nextj;
+ }
+ dsum = 0;
+ for (n = 0; n < l; n++) {
+ if (n == j)
+ continue;
+ for (m = 0; m < k; m++) {
+ if (clusters[m].medoid == n)
+ goto swap_nextn;
+ }
+ d = INFINITY;
+ d2 = INFINITY;
+ for (m = 0; m < k; m++) {
+ if (distances[distanceindex(clusters[m].medoid, n)] < d) {
+ d2 = d;
+ d = distances[distanceindex(clusters[m].medoid, n)];
+ } else if (distances[distanceindex(clusters[m].medoid, n)] < d2) {
+ d2 = distances[distanceindex(clusters[m].medoid, n)];
+ }
+ }
+/*
+printf("%lf\t%lf\t%lf\t%lf\n", distances[distanceindex(clusters[i].medoid, n)], distances[distanceindex(j, n)], d, d2);
+*/
+ if (distances[distanceindex(clusters[i].medoid, n)] > d) {
+ if (distances[distanceindex(j, n)] >= d)
+ gnarf = 0;
+ else
+ gnarf = distances[distanceindex(j, n)] - d;
+ } else if (distances[distanceindex(clusters[i].medoid, n)] == d) {
+ if (distances[distanceindex(j, n)] < d2)
+ gnarf = distances[distanceindex(j, n)] - d;
+ else
+ gnarf = d2 - d;
+ } else {
+ continue;
+ }
+ dsum += gnarf;
+swap_nextn:;
+ }
+ if (dsum < tdsum) {
+ ti = i;
+ th = j;
+ tdsum = dsum;
+/*
+printf("%lu\t%lu\t%lf\n", clusters[ti].medoid, th, tdsum);
+*/
+ }
+swap_nextj:;
+ }
+ }
+ if (tdsum >= 0)
+ break;
+ clusters[ti].medoid = th;
+ }
+
+ for (i = 0; i < k; i++)
+ clusters[i].n = 0;
+
+ for (i = 0; i < l; i++) {
+ d = INFINITY;
+ for (j = 0; j < k; j++) {
+ if (distances[distanceindex(clusters[j].medoid, i)] < d) {
+ assoc[i] = j;
+ d = distances[distanceindex(clusters[j].medoid, i)];
+ }
+ }
+ clusters[assoc[i]].n++;
+ }
+
+ dsum = 0;
+ for (i = 0; i < l; i++) {
+ d = INFINITY;
+ d2 = INFINITY;
+ for (j = 0; j < k; j++) {
+ if (distances[distanceindex(clusters[j].medoid, i)] < d) {
+ d2 = d;
+ d = distances[distanceindex(clusters[j].medoid, i)];
+ } else if (distances[distanceindex(clusters[j].medoid, i)] < d2) {
+ d2 = distances[distanceindex(clusters[j].medoid, i)];
+ }
+ }
+ if (clusters[assoc[i]].medoid != i)
+ dsum += 1 - d / (d2 + 0.000000000001);
+ printf("%lu\t%lu\t%lf\t%lf\t%lf\t%lu\t%s\n", i, assoc[i], 1 - d / (d2 + 0.000000000001), d, d2, g[i].s, g[i].fields[1]);
+ if (clusters[assoc[i]].medoid == i)
+ assoc[i] |= 128;
+ }
+ printf("Silhouette = %lf\n", dsum / (l - k));
+
+/*
+ while (1) {
+ flag = 0;
+ for (i = 0; i < l; i++) {
+ d = INFINITY;
+ m = assoc[i];
+ for (j = 0; j < k; j++) {
+ if (distances[distanceindex(clusters[j].medoid, i)] < d) {
+ d = distances[distanceindex(clusters[j].medoid, i)];
+ assoc[i] = j;
+ }
+ }
+ if (assoc[i] != m)
+ flag = 1;
+ }
+ if (!flag)
+ break;
+
+ for (i = 0; i < k; i++)
+ clusters[i].dsum = INFINITY;
+
+ for (i = 0; i < l; i++) {
+ dsum = 0;
+ for (j = 0; j < l; j++) {
+ if (assoc[i] != assoc[j])
+ continue;
+ dsum += distanceindex(i, j);
+ }
+ if (dsum < clusters[assoc[i]].dsum) {
+ clusters[assoc[i]].dsum = dsum;
+ clusters[assoc[i]].medoid = i;
+ }
+ }
+ }
+*/
+ result = k;
+cleanup:
+ free(clusters);
+ return result;
+}
+
+struct room {
+ struct room *p;
+ void *d;
+ size_t x, y;
+ size_t w, h;
+};
+
+int
+generaterooms_collides(size_t x1, size_t y1, size_t w1, size_t h1,
+ size_t x2, size_t y2, size_t w2, size_t h2,
+ size_t margin)
+{
+ return !((y1 >= y2 + h2 + margin || y2 >= y1 + h1 + margin) || (x1 >= x2 + w2 + margin || x2 >= x1 + w1 + margin));
+}
+
+int
+generaterooms_isfree(struct room *rs, size_t l, size_t x, size_t y, size_t w, size_t h, size_t margin)
+{
+ size_t i;
+
+ for (i = 0; i < l; i++) {
+ if (generaterooms_collides(rs[i].x, rs[i].y, rs[i].w, rs[i].h,
+ x, y, w, h, margin))
+ return 0;
+ }
+ return 1;
+}
+
+struct rect {
+ struct rect *next, *next2;
+ struct room *room;
+ size_t x1, y1;
+ size_t x2, y2;
+ size_t d;
+};
+
+struct rect *
+generaterooms_gnarf_randomneighbor(struct rect *x, struct rect *rs, uint32_t *prng)
+{
+ struct rect *r, *result;
+ size_t n;
+
+ n = 0;
+ result = NULL;
+ for (r = rs; r; r = r->next) {
+ if (r->y2 < x->y1 || r->y1 > x->y2 || r->x2 < x->x1 || r->x1 > x->x2)
+ continue;
+ if ((r->y2 == x->y1 || r->y1 == x->y2) && (r->x2 == x->x1 || r->x1 == x->x2))
+ continue;
+ n++;
+ if (xorshift(prng) / (1. + UINT32_MAX) < 1. / n)
+ result = r;
+ }
+
+ return result;
+}
+
+#define ROOM_HEIGHT_MIN 3
+#define ROOM_WIDTH_MIN 5
+#define ROOM_MARGIN_MIN 1
+#define CELL_HEIGHT_MIN (ROOM_HEIGHT_MIN + ROOM_MARGIN_MIN + 3)
+#define CELL_WIDTH_MIN (ROOM_WIDTH_MIN + ROOM_MARGIN_MIN + 3)
+size_t
+generaterooms_gnarf(char *host, char *port, char *selector, struct room *rs, size_t l)
+{
+ struct rect *queuehead, *queuetail;
+ struct rect *r, *t;
+ struct rect *rects, *walk;
+ size_t w, h, i, j, rl, n;
+ uint32_t prng;
+ int vertical;
+ struct room *room;
+
+ prng = fnv1a(4, host, port, selector, "seed");
+
+ r = xmalloc(sizeof(*r));
+ r->x1 = r->y1 = ROOM_MARGIN_MIN;
+ r->x2 = MAPWIDTH;
+ r->y2 = MAPHEIGHT;
+ r->d = 0;
+
+ queuetail = r;
+ queuetail->next = NULL;
+ queuehead = r;
+
+ rects = NULL;
+ rl = 0;
+
+ while (queuehead) {
+ r = queuehead;
+ if (queuetail == queuehead)
+ queuetail = NULL;
+ queuehead = queuehead->next;
+
+/*
+ if (r->d >= 8) {
+ r->next = rects;
+ rects = r;
+ rl++;
+ continue;
+ }
+*/
+
+ if (r->x2 - r->x1 >= CELL_WIDTH_MIN * 2 && r->y2 - r->y1 >= CELL_HEIGHT_MIN * 2) {
+ vertical = xorshift(&prng) & 1;
+ } else if (r->x2 - r->x1 >= CELL_WIDTH_MIN * 2) {
+ vertical = 0;
+ } else if (r->y2 - r->y1 >= CELL_HEIGHT_MIN * 2) {
+ vertical = 1;
+ } else {
+ r->next = rects;
+ rects = r;
+ rl++;
+ continue;
+ }
+
+ if (vertical) {
+ w = r->x2 - r->x1;
+ h = CELL_HEIGHT_MIN + xorshift(&prng) % (1 + r->y2 - r->y1 - CELL_HEIGHT_MIN * 2);
+ } else {
+ w = CELL_WIDTH_MIN + xorshift(&prng) % (1 + r->x2 - r->x1 - CELL_WIDTH_MIN * 2);
+ h = r->y2 - r->y1;
+ }
+
+ t = xmalloc(sizeof(*t));
+ t->x1 = r->x1;
+ t->y1 = r->y1;
+ t->x2 = r->x1 + w;
+ t->y2 = r->y1 + h;
+ t->d = r->d + 1;
+ t->next = NULL;
+ t->room = NULL;
+
+ if (!queuetail) {
+ queuehead = t;
+ queuetail = t;
+ } else {
+ queuetail->next = t;
+ queuetail = t;
+ }
+
+ t = xmalloc(sizeof(*t));
+ if (vertical) {
+ t->x1 = r->x1;
+ t->y1 = r->y1 + h;
+ } else {
+ t->x1 = r->x1 + w;
+ t->y1 = r->y1;
+ }
+ t->x2 = r->x2;
+ t->y2 = r->y2;
+ t->d = r->d + 1;
+ t->next = NULL;
+ t->room = NULL;
+
+ queuetail->next = t;
+ queuetail = t;
+
+ free(r);
+ }
+
+ if (l > rl)
+ l = rl;
+
+ for (r = rects; r; r = r->next) {
+ if (MAPHEIGHT / 2 >= r->y1 && MAPHEIGHT / 2 < r->y2 &&
+ MAPWIDTH / 2 >= r->x1 && MAPWIDTH / 2 < r->x2)
+ break;
+ }
+
+ i = 0;
+ rs[i].w = ROOM_WIDTH_MIN + xorshift(&prng) % (1 + r->x2 - r->x1 - ROOM_MARGIN_MIN - ROOM_WIDTH_MIN);
+ rs[i].h = ROOM_HEIGHT_MIN + xorshift(&prng) % (1 + r->y2 - r->y1 - ROOM_MARGIN_MIN - ROOM_HEIGHT_MIN);
+ rs[i].x = r->x1 + xorshift(&prng) % (1 + r->x2 - r->x1 - ROOM_MARGIN_MIN - rs[i].w);
+ rs[i].y = r->y1 + xorshift(&prng) % (1 + r->y2 - r->y1 - ROOM_MARGIN_MIN - rs[i].h);
+ rs[i].p = NULL;
+ r->room = &rs[i];
+
+ walk = r;
+ walk->next2 = NULL;
+
+ i++;
+ for (; i < l;) {
+ t = generaterooms_gnarf_randomneighbor(r, rects, &prng);
+ if (!t || t->room) {
+ n = 0;
+ for (t = walk; t; t = t->next2) {
+ n++;
+ if (xorshift(&prng) / (1. + UINT32_MAX) < 1. / n)
+ r = t;
+
+ }
+ continue;
+ }
+ rs[i].w = ROOM_WIDTH_MIN + xorshift(&prng) % (1 + t->x2 - t->x1 - ROOM_MARGIN_MIN - ROOM_WIDTH_MIN);
+ rs[i].h = ROOM_HEIGHT_MIN + xorshift(&prng) % (1 + t->y2 - t->y1 - ROOM_MARGIN_MIN - ROOM_HEIGHT_MIN);
+ rs[i].x = t->x1 + xorshift(&prng) % (1 + t->x2 - t->x1 - ROOM_MARGIN_MIN - rs[i].w);
+ rs[i].y = t->y1 + xorshift(&prng) % (1 + t->y2 - t->y1 - ROOM_MARGIN_MIN - rs[i].h);
+ rs[i].p = r->room;
+ t->room = &rs[i];
+ i++;
+ r = t;
+ r->next2 = walk;
+ walk = r;
+ }
+
+/*
+ for (r = rects, i = 0; i < l; i++, r = r->next) {
+ rs[i].w = 3 + xorshift(&prng) % (1 + r->x2 - r->x1 - 1 - 3);
+ rs[i].h = 2 + xorshift(&prng) % (1 + r->y2 - r->y1 - 1 - 2);
+ rs[i].x = r->x1 + xorshift(&prng) % (1 + r->x2 - r->x1 - 1 - rs[i].w);
+ rs[i].y = r->y1 + xorshift(&prng) % (1 + r->y2 - r->y1 - 1 - rs[i].h);
+ rs[i].p = NULL;
+ }
+*/
+
+ return l;
+}
+
+#define ROOMS_MARGIN_MIN 1
+#define ROOMS_MARGIN_MAX 3
+#define MAP_MARGIN 1
+size_t
+generaterooms_stupid(char *host, char *port, char *selector, struct room *rs, size_t l)
+{
+ size_t i, j, ri, m, n;
+ ssize_t y, x;
+ ssize_t w, h;
+ char buffer[3];
+ struct room *r;
+ uint32_t prng;
+
+ rs[0].h = 4 + fnv1a(4, host, port, selector, "h") % 5;
+ rs[0].w = 4 + fnv1a(4, host, port, selector, "w") % 5;
+ rs[0].y = MAPHEIGHT / 2 - rs[0].h / 2;
+ rs[0].x = MAPWIDTH / 2 - rs[0].w / 2;
+ for (i = 1; i < l; i++) {
+ snprintf(buffer, sizeof(buffer), "%u", i);
+ prng = fnv1a(5, host, port, selector, buffer, "seed");
+ n = 0;
+ do {
+ do {
+ switch (xorshift(&prng) % 3) {
+ case 0:
+ h = 3 + xorshift(&prng) % 3;
+ w = 3 + xorshift(&prng) % 2;
+ break;
+ case 1:
+ w = 3 + xorshift(&prng) % 3;
+ h = 4 + xorshift(&prng) % 2;
+ break;
+ case 2:
+ h = w = 4 + xorshift(&prng) % 5;
+ break;
+ }
+ m = ROOMS_MARGIN_MIN + xorshift(&prng) % (1 + ROOMS_MARGIN_MAX - ROOMS_MARGIN_MIN);
+ j = xorshift(&prng) % i;
+ switch (xorshift(&prng) % 4) {
+ case 0:
+ x = rs[j].x;
+ if (w < rs[j].w)
+ x += xorshift(&prng) % (rs[j].w - w + 1);
+ else
+ x -= xorshift(&prng) % (w - rs[j].w + 1);
+ y = rs[j].y - h - m;
+ break;
+ case 1:
+ x = rs[j].x + rs[j].w + m;
+ y = rs[j].y;
+ if (h < rs[j].h)
+ y += xorshift(&prng) % (rs[j].h - h + 1);
+ else
+ y -= xorshift(&prng) % (h - rs[j].h + 1);
+ break;
+ case 2:
+ x = rs[j].x;
+ if (w < rs[j].w)
+ x += xorshift(&prng) % (rs[j].w - w + 1);
+ else
+ x -= xorshift(&prng) % (w - rs[j].w + 1);
+ y = rs[j].y + rs[j].h + m;
+ break;
+ case 3:
+ x = rs[j].x - w - m;
+ y = rs[j].y;
+ if (h < rs[j].h)
+ y += xorshift(&prng) % (rs[j].h - h + 1);
+ else
+ y -= xorshift(&prng) % (h - rs[j].h + 1);
+ break;
+ }
+ } while (x < MAP_MARGIN || x + w + MAP_MARGIN > MAPWIDTH || y < MAP_MARGIN || y + h + MAP_MARGIN > MAPHEIGHT);
+ n++;
+ } while (n <= 100 && !generaterooms_isfree(rs, i, x, y, w, h, ROOMS_MARGIN_MIN));
+ if (n > 100)
+ break;
+ rs[i].h = h;
+ rs[i].w = w;
+ rs[i].x = x;
+ rs[i].y = y;
+ rs[i].p = &rs[j];
+ }
+ return i;
+}
+
+#define ROOMS_GRID_ROWS 5
+#define ROOMS_GRID_COLS 10
+size_t
+generaterooms_grid(char *host, char *port, char *selector, struct room *rs, size_t l)
+{
+ size_t i, j, k, x, y;
+ unsigned char cells[ROOMS_GRID_ROWS * ROOMS_GRID_COLS];
+ char buffer[3];
+ uint32_t prng;
+
+ memset(cells, 0, sizeof(cells));
+
+ if (l > ROOMS_GRID_ROWS * ROOMS_GRID_COLS)
+ l = ROOMS_GRID_ROWS * ROOMS_GRID_COLS;
+
+ for (i = 0; i < l; i++) {
+ snprintf(buffer, sizeof(buffer), "%u", i);
+ prng = fnv1a(5, host, port, selector, buffer, "seed");
+ j = 0;
+ do {
+ k = xorshift(&prng) % (ROOMS_GRID_ROWS * ROOMS_GRID_COLS);
+ j++;
+ } while (j <= 100 && cells[k]);
+ if (j > 100)
+ break;
+ y = k / ROOMS_GRID_COLS;
+ x = k % ROOMS_GRID_COLS;
+ cells[k] = 1;
+ rs[i].w = 3 + xorshift(&prng) % (1 + 8 - 2 - 3);
+ rs[i].h = 3 + xorshift(&prng) % (1 + 5 - 2 - 3);
+ rs[i].x = x * 8 + 1 + xorshift(&prng) % (1 + 8 - 2 - rs[i].w);
+ rs[i].y = y * 5 + 1 + xorshift(&prng) % (1 + 5 - 2 - rs[i].h);
+/*
+printf("%u\t%u\t%u\t%u\n", rs[i].w, rs[i].h, rs[i].x, rs[i].y);
+*/
+ rs[i].p = NULL;
+ }
+
+ return i;
+}
+
+void
+generaterooms_grid2_(struct room *r, uint32_t *prng, size_t x, size_t y)
+{
+ r->w = 3 + xorshift(prng) % (1 + 8 - 2 - 3);
+ r->h = 3 + xorshift(prng) % (1 + 5 - 2 - 3);
+ r->x = x * 8 + 1 + xorshift(prng) % (1 + 8 - 2 - r->w);
+ r->y = y * 5 + 1 + xorshift(prng) % (1 + 5 - 2 - r->h);
+ r->p = NULL;
+/*
+printf("%u\t%u\t%u\t%u\n", r->w, r->h, r->x, r->y);
+*/
+}
+
+size_t
+generaterooms_grid2(char *host, char *port, char *selector, struct room *rs, size_t l)
+{
+ const struct {
+ ssize_t y, x;
+ } gnarf[8] = {
+ { -1, 0 },
+ { 0, 1 },
+ { 1, 0 },
+ { 0, -1 },
+ { -1, 1 },
+ { 1, 1 },
+ { 1, -1 },
+ { -1, -1 }
+ };
+ size_t i, j, k, m, n;
+ ssize_t x, y;
+ struct room *cells[ROOMS_GRID_ROWS][ROOMS_GRID_COLS];
+ char buffer[3];
+ uint32_t prng;
+ struct room *r;
+
+ memset(cells, 0, sizeof(cells));
+
+ if (l > ROOMS_GRID_ROWS * ROOMS_GRID_COLS)
+ l = ROOMS_GRID_ROWS * ROOMS_GRID_COLS;
+
+ prng = fnv1a(5, host, port, selector, "seed");
+ x = ROOMS_GRID_COLS / 2;
+ y = ROOMS_GRID_ROWS / 2;
+
+ r = &rs[0];
+ generaterooms_grid2_(r, &prng, x, y);
+ cells[y][x] = r;
+
+ for (i = 1; i < l; i++) {
+ for (n = 0; n < 100; n++) {
+ m = xorshift(&prng) % 4;
+ x += gnarf[m].x;
+ y += gnarf[m].y;
+ if (x < 0 || x >= ROOMS_GRID_COLS || y < 0 || y >= ROOMS_GRID_ROWS || cells[y][x]) {
+ m = xorshift(&prng) % i;
+ r = &rs[m];
+ x = r->x / 8;
+ y = r->y / 5;
+ } else {
+ generaterooms_grid2_(&rs[i], &prng, x, y);
+ rs[i].p = r;
+ cells[y][x] = r = &rs[i];
+ break;
+ }
+ }
+ if (n >= 100)
+ break;
+ }
+
+ return i;
+}
+
+size_t
+distance(size_t x1, size_t y1, size_t x2, size_t y2)
+{
+ size_t d;
+
+ if (y1 < y2)
+ d = y2 - y1;
+ else
+ d = y1 - y2;
+ if (x1 < x2)
+ d += x2 - x1;
+ else
+ d += x1 - x2;
+
+ return d;
+}
+
+void
+nearestpoints(struct room *a, struct room *b, size_t *ax, size_t *ay, size_t *bx, size_t *by)
+{
+ if (a->y >= b->y && a->y < b->y + b->h) {
+ *ay = *by = a->y;
+ } else if (b->y >= a->y && b->y < a->y + a->h) {
+ *ay = *by = b->y;
+ } else if (a->y >= b->y) {
+ *ay = a->y;
+ *by = b->y + b->h - 1;
+ } else if (b->y >= a->y) {
+ *ay = a->y + a->h - 1;
+ *by = b->y;
+ }
+
+ if (a->x >= b->x && a->x < b->x + b->w) {
+ *ax = *bx = a->x;
+ } else if (b->x >= a->x && b->x < a->x + a->w) {
+ *ax = *bx = b->x;
+ } else if (a->x >= b->x) {
+ *ax = a->x;
+ *bx = b->x + b->w - 1;
+ } else if (b->x >= a->x) {
+ *ax = a->x + a->w - 1;
+ *bx = b->x;
+ }
+}
+
+size_t
+roomdistance(struct room *a, struct room *b)
+{
+ size_t x1, y1;
+ size_t x2, y2;
+
+ nearestpoints(a, b, &x1, &y1, &x2, &y2);
+
+ return distance(x1, y1, x2, y2);
+}
+
+struct room *
+nearestroom(struct room *rs, size_t l, struct room *x, uint32_t prng)
+{
+ size_t i, d, cd;
+ ssize_t j;
+ double n;
+
+ j = -1;
+ d = UINT32_MAX;
+ for (i = 0; i < l; i++) {
+ if (&rs[i] == x)
+ continue;
+ if ((cd = roomdistance(&rs[i], x)) < d) {
+ j = i;
+ d = cd;
+ n = 1;
+ } else if (cd == d) {
+ n++;
+ if (xorshift(&prng) / (UINT32_MAX + 1.) < 1. / n)
+ j = i;
+ }
+ }
+
+ if (j == -1)
+ return NULL;
+
+ return &rs[j];
+}
+
+void
+connectrooms(struct room *a, struct room *b)
+{
+ size_t i, j;
+ ssize_t ii;
+ size_t x1, y1;
+ size_t x2, y2;
+
+ nearestpoints(a, b, &x1, &y1, &x2, &y2);
+
+ if (y1 > y2) {
+ ii = -1;
+ } else if (y2 > y1) {
+ ii = 1;
+ } else {
+ ii = 0;
+ }
+
+/*
+printf("%lu\t%lu\t%d\n", y1, y2, ii);
+*/
+ for (i = y1; i != y2; i += ii)
+ map[i][x1].c = '.';
+
+ if (x1 > x2) {
+ ii = -1;
+ } else if (x2 > x1) {
+ ii = 1;
+ } else {
+ ii = 0;
+ }
+
+ for (i = x1; i != x2; i += ii)
+ map[y2][i].c = '.';
+}
+
+void
+rendermap(void)
+{
+ size_t i, j;
+
+ for (i = 0; i < MAPHEIGHT; i++) {
+ for (j = 0; j < MAPWIDTH; j++)
+ putchar(map[i][j].c);
+ putchar('\n');
+ }
+}
+
+size_t
+diff(size_t a, size_t b)
+{
+ if (a < b)
+ return b - a;
+ return a - b;
+}
+
+/*
+ https://en.wikipedia.org/wiki/Levenshtein_distance#Recursive
+*/
+size_t
+levenshteindistance_slow(char *a, char *b)
+{
+ size_t k, l, m;
+
+ if (!*a)
+ return strlen(b);
+ else if (!*b)
+ return strlen(a);
+ else if (*a == *b)
+ return levenshteindistance_slow(a + 1, b + 1);
+
+ k = levenshteindistance_slow(a + 1, b);
+ l = levenshteindistance_slow(a, b + 1);
+ m = levenshteindistance_slow(a + 1, b + 1);
+
+ if (k < l && k < m)
+ return 1 + k;
+ else if (l < k && l < m)
+ return 1 + l;
+ else
+ return 1 + m;
+}
+
+ssize_t
+placeitems_kmedoid(char *host, char *port, char *selector, struct gopherentry *g, size_t l, size_t *assocs, size_t k)
+{
+ size_t i, j;
+ ssize_t r;
+ double *distances, d;
+ double *mdists;
+ struct {
+ double selector;
+ double segment;
+ double index;
+ } *distanceparts, maxdistanceparts;
+ uint32_t prng;
+ struct point *points;
+
+ distanceparts = xmalloc(triangularnumber(l) * sizeof(*distanceparts));
+
+ memset(&maxdistanceparts, 0, sizeof(maxdistanceparts));
+ for (i = 0; i < l; i++) {
+ memset(&distanceparts[distanceindex(i, i)], 0, sizeof(*distanceparts));
+ for (j = 0; j < i; j++) {
+ distanceparts[distanceindex(i, j)].selector = levenshteindistance(g[i].fields[1], g[j].fields[1]) + levenshteindistance(g[i].fields[2], g[j].fields[2]) + levenshteindistance(g[i].fields[3], g[j].fields[3]);
+ distanceparts[distanceindex(i, j)].segment = diff(g[i].s, g[j].s);
+ distanceparts[distanceindex(i, j)].index = diff(g[i].i, g[j].i);
+
+ if (distanceparts[distanceindex(i, j)].selector > maxdistanceparts.selector)
+ maxdistanceparts.selector = distanceparts[distanceindex(i, j)].selector;
+ if (distanceparts[distanceindex(i, j)].segment > maxdistanceparts.segment)
+ maxdistanceparts.segment = distanceparts[distanceindex(i, j)].segment;
+ if (distanceparts[distanceindex(i, j)].index > maxdistanceparts.index)
+ maxdistanceparts.index = distanceparts[distanceindex(i, j)].index;
+ }
+ }
+ for (i = 0; i < l; i++) {
+ for (j = 0; j < i; j++) {
+ distanceparts[distanceindex(i, j)].selector /= maxdistanceparts.selector;
+ distanceparts[distanceindex(i, j)].segment /= maxdistanceparts.segment;
+ distanceparts[distanceindex(i, j)].index /= maxdistanceparts.index;
+ }
+ }
+
+ distances = xmalloc(triangularnumber(l) * sizeof(*distances));
+ prng = fnv1a(4, host, port, selector, "wiggle");
+ for (i = 0; i < l; i++) {
+ distances[distanceindex(i, i)] = 0;
+ for (j = 0; j < i; j++)
+ distances[distanceindex(i, j)] = + distanceparts[distanceindex(i, j)].selector;
+ }
+ free(distanceparts);
+
+ r = kmedoids(g, l, k, distances, assocs, fnv1a(4, host, port, selector, "seed"));
+
+ points = calloc(l, sizeof(*points));
+ sammon(distances, l, fnv1a(4, host, port, selector, "sammonseed"), points);
+
+ for (i = 0; i < l; i++)
+ printf("%lf\t%lf\t%ld\t%lu\t%s\n", points[i].dims[0], points[i].dims[1], assocs[i] & 127, assocs[i] >> 7, g[i].fields[1]);
+
+ free(points);
+ free(distances);
+
+ return r;
+}
+
+size_t
+placeitems_hash(char *host, char *port, char *selector, struct gopherentry *g, size_t l, size_t *assocs, size_t k)
+{
+ size_t i;
+
+ for (i = 0; i < l; i++)
+ assocs[i] = fnv1a(6, host, port, selector, g[i].fields[1], g[i].fields[2], g[i].fields[3]) % k;
+
+ return k;
+}
+
+size_t py, px;
+
+enum {
+ Portal,
+ StaircaseDown,
+ Bookshelf
+};
+
+void
+generatemap(char *host, char *port, char *selector, struct gopherentry *g, size_t l)
+{
+ size_t i, j, k;
+ size_t x, y;
+ ssize_t n, m;
+ size_t *cassocs;
+ struct room *rooms, *r;
+ struct {
+ unsigned char x, y;
+ } positions[3];
+ uint32_t prng;
+ char buffer[3];
+
+ memset(map, 0, sizeof(map));
+
+ for (i = 0; i < MAPHEIGHT; i++) {
+ for (j = 0; j < MAPWIDTH; j++) {
+ map[i][j].c = '#';
+ }
+ }
+ k = 7;
+ rooms = calloc(k, sizeof(*rooms));
+ if (!rooms)
+ return;
+ k = generaterooms_gnarf(host, port, selector, rooms, k);
+
+ cassocs = calloc(l, sizeof(*cassocs));
+ if (!cassocs)
+ goto cleanup;
+
+ k = placeitems_kmedoid(host, port, selector, g, l, cassocs, k);
+
+ /* Insert rooms */
+ for (i = 0; i < k; i++) {
+ for (y = rooms[i].y; y < rooms[i].y + rooms[i].h; y++) {
+ for (x = rooms[i].x; x < rooms[i].x + rooms[i].w; x++)
+ map[y][x].c = '.';
+ }
+ }
+
+ /* Insert connections */
+ for (i = 0; i < k; i++) {
+ if (rooms[i].p)
+ connectrooms(&rooms[i], rooms[i].p);
+/*
+ if ((r = nearestroom(rooms, i, &rooms[i], fnv1a(4, host, port, selector, "gnarf"))) && r != rooms[i].p && r->p != &rooms[i])
+ connectrooms(&rooms[i], r);
+*/
+ }
+
+ /* Insert items */
+ for (i = 0; i < k; i++) {
+ snprintf(buffer, sizeof(buffer), "%d", i);
+ prng = fnv1a(4, host, port, selector, buffer);
+ for (j = 0; j < 3; j++) {
+ /* Ugly brute force strategy... I'm sure there is a math-way of doing this. */
+ do {
+ positions[j].x = rooms[i].x + xorshift(&prng) % rooms[i].w;
+ positions[j].y = rooms[i].y + xorshift(&prng) % rooms[i].h;
+ for (m = 0; m < j && positions[m].x != positions[j].x ||
+ positions[m].y != positions[j].y; m++);
+ } while (m < j);
+ }
+ for (j = 0; j < l; j++) {
+ if (cassocs[j] != i)
+ continue;
+/*
+ printf("%s\t%lu\n", g[j].fields[1], cassocs[j]);
+*/
+ switch (g[j].type) {
+ case '0':
+ x = positions[Bookshelf].x;
+ y = positions[Bookshelf].y;
+ map[y][x].c = 'E';
+ break;
+ case '1':
+ if (strcmp(g[j].fields[2], host) || strcmp(g[j].fields[3], port)) {
+ x = positions[Portal].x;
+ y = positions[Portal].y;
+ map[y][x].c = 'O';
+ } else {
+ x = positions[StaircaseDown].x;
+ y = positions[StaircaseDown].y;
+ if (map[y][x].data)
+ map[y][x].c = 'L';
+ else
+ map[y][x].c = '>';
+ }
+ break;
+ }
+ g[j].next = map[y][x].data;
+ map[y][x].data = &g[j];
+ }
+ }
+
+ free(cassocs);
+cleanup:
+ free(rooms);
+}
+
+char gopherbuffer[16 * 1024 * 1024];
+
+void
+interact(void)
+{
+}
+
+void
+loop(void)
+{
+ char c;
+
+ do {
+ switch (c) {
+ case 'h':
+ break;
+ case 'j':
+ break;
+ case 'k':
+ break;
+ case 'l':
+ break;
+ case ' ':
+ interact();
+ break;
+ }
+ } while (1);
+}
+
+int
+main(int argc, char *argv[])
+{
+ ssize_t l;
+ struct gopherentry *g;
+ char *host, *port, *selector;
+ struct termios termios, ntermios;
+
+ if (argc < 2) {
+ printf("%s host [port [selector]]\n", argv[0]);
+ return 1;
+ }
+
+/*
+ memset(&termios, 0, sizeof(termios));
+ if (tcgetattr(0, &termios) == -1)
+ return 1;
+
+ memcpy(&ntermios, &termios, sizeof(termios));
+ ntermios.c_lflag &= ~(ICANON | ECHO);
+ ntermios.c_cc[VMIN] = 1;
+ ntermios.c_cc[VTIME] = 0;
+
+ if (tcsetattr(0, TCSANOW, &ntermios) == -1)
+ return 1;
+*/
+
+ host = argv[1];
+ port = (argc > 2) ? argv[2] : "70";
+ selector = (argc > 3) ? argv[3] : "";
+
+ do {
+ l = gopher_request(host, port, selector, gopherbuffer, sizeof(gopherbuffer));
+ if (l == -1)
+ return 1;
+
+ l = gopher_parsemenu(gopherbuffer, &g);
+ if (l == -1)
+ return 1;
+
+ l = gopher_removeentries(g, l);
+
+ generatemap(host, port, selector, g, l);
+/*
+ rendermap();
+*/
+
+/*
+ loop();
+*/
+ } while (0);
+
+/*
+ if (tcsetattr(0, TCSANOW, &termios) == -1)
+ return 1;
+*/
+
+ return 0;
+}
(DIR) diff --git a/gopher/gopher-clustering/gopher-clustering2.c b/gopher/gopher-clustering/gopher-clustering2.c
@@ -0,0 +1,1165 @@
+#define _POSIX_C_SOURCE 200112L
+
+#include <assert.h>
+#include <stdarg.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <math.h>
+
+#include <netdb.h>
+#include <sys/socket.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+uint32_t
+fnv1a(int n,...)
+{
+ int i;
+ char *s;
+ va_list l;
+ uint32_t h;
+
+ h = 0x811c9dc5;
+
+ va_start(l, n);
+ for (i = 0; i < n; i++) {
+ for (s = va_arg(l, char*); *s; s++) {
+ h ^= *s;
+ h *= 0x01000193;
+ }
+ }
+ va_end(l);
+
+ return h;
+}
+
+uint32_t
+xorshift(uint32_t *s)
+{
+ *s ^= *s << 13;
+ *s ^= *s >> 17;
+ *s ^= *s << 5;
+ return *s;
+}
+
+/*
+ https://github.com/skeeto/hash-prospector/issues/19#issuecomment-1120105785
+*/
+uint32_t
+mix(uint32_t x)
+{
+ x ^= x >> 16;
+ x *= 0x21f0aaad;
+ x ^= x >> 15;
+ x *= 0x735a2d97;
+ x ^= x >> 16;
+ return x;
+}
+
+struct gopherentry {
+ struct gopherentry *next;
+ char type;
+ char *fields[4];
+ size_t i, s;
+};
+
+char *
+gopher_fieldend(char *s)
+{
+ for (; *s; s++) {
+ if (*s == '\t')
+ break;
+ if (*s == '\r' && *(s+1) == '\n')
+ break;
+ if (*s == '\n')
+ break;
+ }
+ return s;
+}
+
+char *
+gopher_nextentry(char *s)
+{
+ char *c;
+
+ if (c = strchr(s, '\n'))
+ return c + 1;
+ return NULL;
+}
+
+void *
+xmalloc(size_t s)
+{
+ void *p;
+
+ if (!(p = malloc(s))) {
+ perror(NULL);
+ exit(1);
+ }
+ return p;
+}
+
+void *
+xrealloc(void *p, size_t s)
+{
+ if (!(p = realloc(p, s))) {
+ perror(NULL);
+ exit(1);
+ }
+ return p;
+}
+
+/* Allow a lot of ugly things... */
+size_t
+gopher_parsemenu(char *d, struct gopherentry **g)
+{
+ ssize_t gl;
+ size_t fl, i, s;
+ char *c, *p, pt;
+ struct gopherentry *cg;
+
+ gl = 0;
+ cg = NULL;
+
+ s = 0;
+ pt = 0;
+ for (c = d; c && *c; c = gopher_nextentry(c)) {
+ if (*c == '.')
+ break;
+ if (*c == '\n')
+ continue;
+
+ gl++;
+ cg = xrealloc(cg, gl * sizeof(struct gopherentry));
+ memset(&cg[gl-1], 0, sizeof(struct gopherentry));
+
+ if (*c != 'i' && pt == 'i')
+ s++;
+ pt = *c;
+
+ cg[gl-1].type = *c;
+ cg[gl-1].i = gl-1;
+ cg[gl-1].s = s;
+ c++;
+
+ for (i = 0; i < 4; i++) {
+ p = gopher_fieldend(c);
+ fl = p - c;
+ cg[gl-1].fields[i] = xmalloc(fl + 1);
+ memcpy(cg[gl-1].fields[i], c, fl);
+ cg[gl-1].fields[i][fl] = '\0';
+ if (*p != '\t')
+ break;
+ c = p + 1;
+ }
+ }
+ s++;
+ *g = cg;
+ return gl;
+}
+
+ssize_t
+gopher_removeentries(struct gopherentry *g, size_t l)
+{
+ size_t i, j;
+
+ for (i = 0; i < l;) {
+ if (g[i].type == 'i' || g[i].type == '3' || !g[i].fields[2] || !g[i].fields[3]) {
+ l--;
+ memmove(&g[i], &g[i+1], (l - i) * sizeof(*g));
+ continue;
+ }
+ for (j = 0; j < i; j++) {
+ if (!strcmp(g[i].fields[1], g[j].fields[1]) &&
+ !strcmp(g[i].fields[2], g[j].fields[2]) &&
+ !strcmp(g[i].fields[3], g[j].fields[3]))
+ break;
+ }
+ if (j < i) {
+ l--;
+ memmove(&g[i], &g[i+1], (l - i) * sizeof(*g));
+ continue;
+ }
+ i++;
+ }
+
+ return l;
+}
+
+ssize_t
+gopher_request(char *host, char *port, char *selector, char *buffer, size_t l)
+{
+ int fd;
+ struct addrinfo hints;
+ struct addrinfo *r, *rp;
+ ssize_t n, result, rn;
+
+ memset(&hints, 0, sizeof(hints));
+ hints.ai_family = AF_UNSPEC;
+ hints.ai_socktype = SOCK_STREAM;
+
+ if (getaddrinfo(host, port, &hints, &r) != 0)
+ return -1;
+
+ for (rp = r; rp; rp = rp->ai_next) {
+ if ((fd = socket(rp->ai_family, rp->ai_socktype, rp->ai_protocol)) == -1)
+ continue;
+ if (connect(fd, rp->ai_addr, rp->ai_addrlen) != -1)
+ break;
+ close(fd);
+ }
+ freeaddrinfo(r);
+ if (!rp)
+ return -1;
+
+ result = -1;
+ if (write(fd, selector, strlen(selector)) != strlen(selector))
+ goto cleanup;
+
+ if (write(fd, "\r\n", 2) != 2)
+ goto cleanup;
+
+ n = 0;
+ while (1) {
+ if (n + 512 > l - 1)
+ goto cleanup;
+ if ((rn = read(fd, &buffer[n], 512)) == -1)
+ goto cleanup;
+ else if (!rn)
+ break;
+ n += rn;
+ }
+
+ buffer[n] = '\0';
+ result = n;
+
+cleanup:
+ close(fd);
+
+ return result;
+}
+
+size_t
+triangularnumber(size_t n)
+{
+ return (n * n + n) / 2;
+}
+
+size_t
+distanceindex(size_t i, size_t j)
+{
+ size_t t;
+
+ if (i < j) {
+ t = i;
+ i = j;
+ j = t;
+ }
+
+ return triangularnumber(i) + j;
+}
+
+struct point {
+ double dims[2];
+};
+
+#define SAMMON_MAPPING_MAX_ITER (1 << 13)
+#define SAMMON_MAPPING_LR (0.01)
+void
+sammon(double *distances, size_t l, uint32_t prng, struct point *points)
+{
+ size_t i, j, k, t, tmax;
+ double *gnarf;
+ double c, lr;
+ double e1, e2, sum, lasterror;
+ struct point *newpoints;
+
+ gnarf = calloc(triangularnumber(l), sizeof(*gnarf));
+ newpoints = calloc(l, sizeof(*newpoints));
+
+ for (i = 0; i < l; i++) {
+ points[i].dims[0] = (double)xorshift(&prng) / UINT32_MAX - 0.5;
+ points[i].dims[1] = (double)xorshift(&prng) / UINT32_MAX - 0.5;
+ }
+
+ c = 0;
+ for (i = 0; i < l; i++) {
+ gnarf[distanceindex(i, i)] = 0;
+ for (j = 0; j < i; j++) {
+ gnarf[distanceindex(i, j)] = sqrt(pow(points[i].dims[0] - points[j].dims[0], 2) + pow(points[i].dims[1] - points[j].dims[1], 2));
+ c += distances[distanceindex(i, j)];
+ }
+ }
+
+ lasterror = 0;
+ for (t = 0, tmax = SAMMON_MAPPING_MAX_ITER; t < tmax; t++) {
+ lr = SAMMON_MAPPING_LR;
+ for (i = 0; i < l; i++) {
+ for (j = 0; j < 2; j++) {
+ e1 = -2. / c;
+ sum = 0;
+ for (k = 0; k < l; k++) {
+ if (i == k)
+ continue;
+ sum += (distances[distanceindex(i, k)] - gnarf[distanceindex(i, k)]) / (distances[distanceindex(i, k)] * gnarf[distanceindex(i, k)]) * (points[i].dims[j] - points[k].dims[j]);
+ }
+ e1 *= sum;
+
+ e2 = -2. / c;
+ sum = 0;
+ for (k = 0; k < l; k++) {
+ if (i == k)
+ continue;
+ sum += 1. / (distances[distanceindex(i, k)] * gnarf[distanceindex(i, k)]) * ((distances[distanceindex(i, k)] - gnarf[distanceindex(i, k)]) - pow(points[i].dims[j] - points[k].dims[j], 2) / gnarf[distanceindex(i, k)] * (1 + (distances[distanceindex(i, k)] - gnarf[distanceindex(i, k)]) / gnarf[distanceindex(i, k)]));
+ }
+ e2 *= sum;
+
+ newpoints[i].dims[j] = points[i].dims[j] - lr * (e1 / fabs(e2));
+ }
+ }
+
+ sum = 0;
+ for (i = 0; i < l; i++) {
+ for (j = 0; j < i; j++) {
+ gnarf[distanceindex(i, j)] = sqrt(pow(newpoints[i].dims[0] - newpoints[j].dims[0], 2) + pow(newpoints[i].dims[1] - newpoints[j].dims[1], 2));
+ sum += pow(distances[distanceindex(i, j)] - gnarf[distanceindex(i, j)], 2) / distances[distanceindex(i, j)];
+ }
+ }
+
+ fprintf(stderr, "sammon mapping error: %lf\n", sum / c);
+ if (t > 0 && fabs(sum / c - lasterror) < 0.000000000001)
+ break;
+ lasterror = sum / c;
+
+ memcpy(points, newpoints, l * sizeof(*points));
+ }
+
+ free(newpoints);
+ free(gnarf);
+}
+
+int
+fastermsc(struct gopherentry *g, size_t l, size_t k, double *distances, size_t *assoc, uint32_t prng)
+{
+ size_t i, j, n, ck, grimmel;
+ ssize_t m1, m, m2;
+ size_t ti, th, huh;
+ double dsum, d, d2, gnarf, tdsum;
+ struct {
+ size_t medoid;
+ size_t n;
+ double dsum;
+ } *clusters;
+ struct {
+ size_t n1, n2, n3;
+ double d1, d2, d3;
+ } *cache;
+ double *dS, *dS2;
+ double cdS, cdS2;
+ size_t cm, cx;
+ ssize_t lx;
+ int flag, result;
+ uint32_t r;
+ double *C;
+
+ if (!l)
+ return 0;
+
+ if (l < k)
+ k = l;
+
+ clusters = calloc(k, sizeof(*clusters));
+ if (!clusters)
+ return -1;
+
+ result = -1;
+
+/*
+ Seed the medoids randomly
+*/
+ for (i = n = 0; i < l; i++) {
+ if ((l - i) * (xorshift(&prng) / (double)UINT32_MAX) < k - n)
+ clusters[n++].medoid = i;
+ if (n == k)
+ break;
+ }
+
+ cache = calloc(l, sizeof(*cache));
+ if (!cache)
+ goto cleanup;
+
+ dS = calloc(k, sizeof(*dS));
+ if (!dS)
+ goto cleanup;
+
+ dS2 = calloc(k, sizeof(*dS2));
+ if (!dS2)
+ goto cleanup;
+
+ for (i = 0; i < l; i++) {
+ cache[i].d1 = cache[i].d2 = cache[i].d3 = INFINITY;
+ for (j = 0; j < k; j++) {
+ d = distances[distanceindex(i, clusters[j].medoid)];
+ if (d < cache[i].d1) {
+ cache[i].d3 = cache[i].d2;
+ cache[i].d2 = cache[i].d1;
+ cache[i].d1 = d;
+ cache[i].n3 = cache[i].n2;
+ cache[i].n2 = cache[i].n1;
+ cache[i].n1 = j;
+ } else if (d < cache[i].d2) {
+ cache[i].d3 = cache[i].d2;
+ cache[i].d2 = d;
+ cache[i].n3 = cache[i].n2;
+ cache[i].n2 = j;
+ } else if (d < cache[i].d3) {
+ cache[i].d3 = d;
+ cache[i].n3 = j;
+ }
+ }
+ }
+
+ for (i = 0; i < k; i++) {
+ dS[i] = 0;
+ for (j = 0; j < l; j++) {
+ if (cache[j].n1 != i)
+ continue;
+ dS[i] += cache[j].d1 / cache[j].d2 - cache[j].d2 / cache[j].d3;
+ }
+ for (j = 0; j < l; j++) {
+ if (cache[j].n2 != i)
+ continue;
+ dS[i] += cache[j].d1 / cache[j].d2 - cache[j].d1 / cache[j].d3;
+ }
+ }
+
+ lx = -1;
+ for (grimmel = 0;grimmel < 32;grimmel++) {
+ cdS = 0;
+ for (i = 0; i < l; i++) {
+ if (i == lx)
+ goto fastermsc_finished;
+
+ for (j = 0; j < k; j++) {
+ if (clusters[j].medoid == i)
+ goto fastmsc_nexti;
+ }
+
+ memcpy(dS2, dS, k * sizeof(*dS2));
+ cdS2 = 0;
+ for (j = 0; j < l; j++) {
+ d = distances[distanceindex(i, j)];
+ if (d < cache[j].d1) {
+ cdS2 += cache[j].d1 / cache[j].d2 - d / cache[j].d1;
+ dS2[cache[j].n1] += d / cache[j].d1 + cache[j].d2 / cache[j].d3 - (cache[j].d1 + d) / cache[j].d2;
+ dS2[cache[j].n2] += cache[j].d1 / cache[j].d3 - cache[j].d1 / cache[j].d2;
+ } else if (d < cache[j].d2) {
+ cdS2 += cache[j].d1 / cache[j].d2 - cache[j].d1 / d;
+ dS2[cache[j].n1] += cache[j].d1 / d + cache[j].d2 / cache[j].d3 - (cache[j].d1 + d) / cache[j].d2;
+ dS2[cache[j].n2] += cache[j].d1 / cache[j].d3 - cache[j].d1 / cache[j].d2;
+ } else if (d < cache[j].d3) {
+ dS2[cache[j].n1] += cache[j].d2 / cache[j].d3 - cache[j].d2 / d;
+ dS2[cache[j].n2] += cache[j].d1 / cache[j].d3 - cache[j].d1 / d;
+ }
+ }
+ d = -INFINITY;
+ for (j = 0; j < k; j++) {
+ if (dS2[j] > d) {
+ d = dS2[j];
+ m = j;
+ }
+ }
+ dS2[m] += cdS2;
+ if (dS2[m] <= 0)
+ continue;
+ lx = i;
+ clusters[m].medoid = i;
+ for (j = 0; j < l; j++) {
+ if (cache[j].n1 != m && cache[j].n2 != m && cache[j].n3 != m)
+ continue;
+ cache[j].d1 = cache[j].d2 = cache[j].d3 = INFINITY;
+ for (n = 0; n < k; n++) {
+ d = distances[distanceindex(j, clusters[n].medoid)];
+ if (d < cache[j].d1) {
+ cache[j].d3 = cache[j].d2;
+ cache[j].d2 = cache[j].d1;
+ cache[j].d1 = d;
+ cache[j].n3 = cache[j].n2;
+ cache[j].n2 = cache[j].n1;
+ cache[j].n1 = n;
+ } else if (d < cache[j].d2) {
+ cache[j].d3 = cache[j].d2;
+ cache[j].d2 = d;
+ cache[j].n3 = cache[j].n2;
+ cache[j].n2 = n;
+ } else if (d < cache[j].d3) {
+ cache[j].d3 = d;
+ cache[j].n3 = n;
+ }
+ }
+ }
+ for (j = 0; j < k; j++) {
+ dS[j] = 0;
+ for (n = 0; n < l; n++) {
+ if (cache[n].n1 != j)
+ continue;
+ dS[j] += cache[n].d1 / cache[n].d2 - cache[n].d2 / cache[n].d3;
+ }
+ for (n = 0; n < l; n++) {
+ if (cache[n].n2 != j)
+ continue;
+ dS[j] += cache[n].d1 / cache[n].d2 - cache[n].d1 / cache[n].d3;
+ }
+ }
+fastmsc_nexti:;
+ }
+/*
+ if (lx == -1)
+ break;
+*/
+ }
+fastermsc_finished:;
+
+ for (i = 0; i < k; i++)
+ clusters[i].n = 0;
+
+ for (i = 0; i < l; i++) {
+ d = INFINITY;
+ for (j = 0; j < k; j++) {
+ if (distances[distanceindex(clusters[j].medoid, i)] < d) {
+ assoc[i] = j;
+ d = distances[distanceindex(clusters[j].medoid, i)];
+ }
+ }
+ clusters[assoc[i]].n++;
+ }
+
+ for (i = 0; i < l; i++) {
+ if (clusters[assoc[i]].medoid == i)
+ assoc[i] |= 128;
+ }
+
+ result = k;
+cleanup:
+ free(clusters);
+ return result;
+}
+
+int
+fastmsc(struct gopherentry *g, size_t l, size_t k, double *distances, size_t *assoc, uint32_t prng)
+{
+ size_t i, j, n, ck;
+ ssize_t m1, m, m2;
+ size_t ti, th;
+ double dsum, d, d2, gnarf, tdsum;
+ struct {
+ size_t medoid;
+ size_t n;
+ double dsum;
+ } *clusters;
+ struct {
+ size_t n1, n2;
+ double d1, d2, d3;
+ } *cache;
+ double *dS, *dS2;
+ double cdS, cdS2;
+ double _e, _y, _z, _s;
+ size_t cm, cx;
+ int flag, result;
+ uint32_t r;
+ double *C;
+
+ if (!l)
+ return 0;
+
+ if (l < k)
+ k = l;
+
+ clusters = calloc(k, sizeof(*clusters));
+ if (!clusters)
+ return -1;
+
+ result = -1;
+
+/*
+ seed the medoids using PAM BUILD
+ d = INFINITY;
+ m = -1;
+ for (i = 0; i < l; i++) {
+ dsum = 0;
+ for (j = 0; j < l; j++)
+ dsum += distances[distanceindex(i, j)];
+ if (dsum < d) {
+ m = i;
+ d = dsum;
+ }
+ }
+ if (m == -1)
+ goto cleanup;
+ ck = 0;
+ clusters[ck].medoid = m;
+ ck++;
+
+ C = xmalloc(l * l * sizeof(*C));
+
+ for (; ck < k; ck++) {
+ memset(C, 0, l * l * sizeof(*C));
+ for (i = 0; i < l; i++) {
+ for (j = 0; j < ck; j++) {
+ if (clusters[j].medoid == i)
+ goto build_nexti;
+ }
+ for (j = 0; j < l; j++) {
+ if (i == j)
+ continue;
+ for (n = 0; n < ck; n++) {
+ if (clusters[n].medoid == j)
+ goto build_nextj;
+ }
+ d = INFINITY;
+ for (n = 0; n < ck; n++) {
+ if (distances[distanceindex(j, clusters[n].medoid)] < d)
+ d = distances[distanceindex(j, clusters[n].medoid)];
+ }
+ C[j * l + i] = d - distances[distanceindex(j, i)];
+ if (C[j * l + i] < 0)
+ C[j * l + i] = 0;
+build_nextj:;
+ }
+build_nexti:;
+ }
+ d = -1;
+ m = -1;
+ for (i = 0; i < l; i++) {
+ for (j = 0; j < ck; j++) {
+ if (clusters[j].medoid == i)
+ goto build_nexti2;
+ }
+ dsum = 0;
+ for (j = 0; j < l; j++)
+ dsum += C[j * l + i];
+ if (dsum > d) {
+ d = dsum;
+ m = i;
+ }
+build_nexti2:;
+ }
+ clusters[ck].medoid = m;
+ }
+ free(C);
+*/
+
+/*
+ Seed the medoids randomly
+*/
+ for (i = n = 0; i < l; i++) {
+ if ((l - i) * (xorshift(&prng) / (double)UINT32_MAX) < k - n)
+ clusters[n++].medoid = i;
+ if (n == k)
+ break;
+ }
+
+ cache = calloc(l, sizeof(*cache));
+ if (!cache)
+ goto cleanup;
+
+ dS = calloc(k, sizeof(*dS));
+ if (!dS)
+ goto cleanup;
+
+ dS2 = calloc(k, sizeof(*dS2));
+ if (!dS2)
+ goto cleanup;
+
+ for (;;) {
+ for (i = 0; i < l; i++) {
+ cache[i].d1 = cache[i].d2 = cache[i].d3 = INFINITY;
+ for (j = 0; j < k; j++) {
+ d = distances[distanceindex(i, clusters[j].medoid)];
+ if (d < cache[i].d1) {
+ cache[i].d3 = cache[i].d2;
+ cache[i].d2 = cache[i].d1;
+ cache[i].d1 = d;
+ cache[i].n2 = cache[i].n1;
+ cache[i].n1 = j;
+ } else if (d < cache[i].d2) {
+ cache[i].d3 = cache[i].d2;
+ cache[i].d2 = d;
+ cache[i].n2 = j;
+ } else if (d < cache[i].d3) {
+ cache[i].d3 = d;
+ }
+ }
+ }
+
+ for (i = 0; i < k; i++) {
+ dS[i] = 0;
+ for (j = 0; j < l; j++) {
+ if (cache[j].n1 != i)
+ continue;
+ dS[i] += cache[j].d1 / cache[j].d2 - cache[j].d2 / cache[j].d3;
+ }
+ for (j = 0; j < l; j++) {
+ if (cache[j].n2 != i)
+ continue;
+ dS[i] += cache[j].d1 / cache[j].d2 - cache[j].d1 / cache[j].d3;
+ }
+ }
+
+ cdS = 0;
+ for (i = 0; i < l; i++) {
+ for (j = 0; j < k; j++) {
+ if (clusters[j].medoid == i)
+ goto fastmsc_nexti;
+ }
+ memcpy(dS2, dS, k * sizeof(*dS2));
+ cdS2 = _e = 0;
+ for (j = 0; j < l; j++) {
+ d = distances[distanceindex(i, j)];
+ if (d < cache[j].d1) {
+ cdS2 += cache[j].d1 / cache[j].d2 - d / cache[j].d1;
+ dS2[cache[j].n1] += d / cache[j].d1 + cache[j].d2 / cache[j].d3 - (cache[j].d1 + d) / cache[j].d2;
+ dS2[cache[j].n2] += cache[j].d1 / cache[j].d3 - cache[j].d1 / cache[j].d2;
+ } else if (d < cache[j].d2) {
+ cdS2 += cache[j].d1 / cache[j].d2 - cache[j].d1 / d;
+ dS2[cache[j].n1] += cache[j].d1 / d + cache[j].d2 / cache[j].d3 - (cache[j].d1 + d) / cache[j].d2;
+ dS2[cache[j].n2] += cache[j].d1 / cache[j].d3 - cache[j].d1 / cache[j].d2;
+ } else if (d < cache[j].d3) {
+ dS2[cache[j].n1] += cache[j].d2 / cache[j].d3 - cache[j].d2 / d;
+ dS2[cache[j].n2] += cache[j].d1 / cache[j].d3 - cache[j].d1 / d;
+ }
+ }
+ d = -INFINITY;
+ d2 = INFINITY;
+ for (j = 0; j < k; j++) {
+ if (dS2[j] > d) {
+ d = dS2[j];
+ m = j;
+ }
+ if (dS2[j] < d2) {
+ d2 = dS2[j];
+ m2 = j;
+ }
+ }
+ dS2[m] += cdS2;
+ if (dS2[m] > cdS) {
+ cdS = dS2[m];
+ cm = m;
+ cx = i;
+ }
+fastmsc_nexti:;
+ }
+ /* Should be 0. I guess some floating point problems? */
+ if (cdS <= 0.00000000000001)
+ break;
+ clusters[cm].medoid = cx;
+ }
+
+ for (i = 0; i < k; i++)
+ clusters[i].n = 0;
+
+ for (i = 0; i < l; i++) {
+ d = INFINITY;
+ for (j = 0; j < k; j++) {
+ if (distances[distanceindex(clusters[j].medoid, i)] < d) {
+ assoc[i] = j;
+ d = distances[distanceindex(clusters[j].medoid, i)];
+ }
+ }
+ clusters[assoc[i]].n++;
+ }
+
+ for (i = 0; i < l; i++) {
+ if (clusters[assoc[i]].medoid == i)
+ assoc[i] |= 128;
+ }
+
+ result = k;
+cleanup:
+ free(clusters);
+ return result;
+}
+
+/*
+ Seeding of medoids based on the PAM BUILD description in https://www.cs.umb.edu/cs738/pam1.pdf
+ I was too stupid to understand other descriptions.
+*/
+int
+kmedoids(struct gopherentry *g, size_t l, size_t k, double *distances, size_t *assoc, uint32_t prng)
+{
+ size_t i, j, n, ck;
+ ssize_t m1, m;
+ size_t ti, th;
+ double dsum, d, d2, gnarf, tdsum;
+ struct {
+ size_t medoid;
+ size_t n;
+ double dsum;
+ } *clusters;
+ int flag, result;
+ uint32_t r;
+ double *C;
+
+ if (!l)
+ return 0;
+
+ if (l < k)
+ k = l;
+
+ clusters = calloc(k, sizeof(*clusters));
+ if (!clusters)
+ return -1;
+
+ result = -1;
+
+ /* seed the medoids using PAM BUILD */
+ d = INFINITY;
+ m = -1;
+ for (i = 0; i < l; i++) {
+ dsum = 0;
+ for (j = 0; j < l; j++)
+ dsum += distances[distanceindex(i, j)];
+ if (dsum < d) {
+ m = i;
+ d = dsum;
+ }
+ }
+ if (m == -1)
+ goto cleanup;
+ ck = 0;
+ clusters[ck].medoid = m;
+ ck++;
+
+ C = xmalloc(l * l * sizeof(*C));
+
+ for (; ck < k; ck++) {
+ memset(C, 0, l * l * sizeof(*C));
+ for (i = 0; i < l; i++) {
+ for (j = 0; j < ck; j++) {
+ if (clusters[j].medoid == i)
+ goto build_nexti;
+ }
+ for (j = 0; j < l; j++) {
+ if (i == j)
+ continue;
+ for (n = 0; n < ck; n++) {
+ if (clusters[n].medoid == j)
+ goto build_nextj;
+ }
+ d = INFINITY;
+ for (n = 0; n < ck; n++) {
+ if (distances[distanceindex(j, clusters[n].medoid)] < d)
+ d = distances[distanceindex(j, clusters[n].medoid)];
+ }
+ C[j * l + i] = d - distances[distanceindex(j, i)];
+ if (C[j * l + i] < 0)
+ C[j * l + i] = 0;
+build_nextj:;
+ }
+build_nexti:;
+ }
+ d = -1;
+ m = -1;
+ for (i = 0; i < l; i++) {
+ for (j = 0; j < ck; j++) {
+ if (clusters[j].medoid == i)
+ goto build_nexti2;
+ }
+ dsum = 0;
+ for (j = 0; j < l; j++)
+ dsum += C[j * l + i];
+ if (dsum > d) {
+ d = dsum;
+ m = i;
+ }
+build_nexti2:;
+ }
+ clusters[ck].medoid = m;
+ }
+ free(C);
+
+ for (;;) {
+ tdsum = INFINITY;
+ for (i = 0; i < k; i++) {
+ for (j = 0; j < l; j++) {
+ for (n = 0; n < k; n++) {
+ if (j == clusters[n].medoid)
+ goto swap_nextj;
+ }
+ dsum = 0;
+ for (n = 0; n < l; n++) {
+ if (n == j)
+ continue;
+ for (m = 0; m < k; m++) {
+ if (clusters[m].medoid == n)
+ goto swap_nextn;
+ }
+ d = INFINITY;
+ d2 = INFINITY;
+ for (m = 0; m < k; m++) {
+ if (distances[distanceindex(clusters[m].medoid, n)] < d) {
+ d2 = d;
+ d = distances[distanceindex(clusters[m].medoid, n)];
+ } else if (distances[distanceindex(clusters[m].medoid, n)] < d2) {
+ d2 = distances[distanceindex(clusters[m].medoid, n)];
+ }
+ }
+ if (distances[distanceindex(clusters[i].medoid, n)] > d) {
+ if (distances[distanceindex(j, n)] >= d)
+ gnarf = 0;
+ else
+ gnarf = distances[distanceindex(j, n)] - d;
+ } else if (distances[distanceindex(clusters[i].medoid, n)] == d) {
+ if (distances[distanceindex(j, n)] < d2)
+ gnarf = distances[distanceindex(j, n)] - d;
+ else
+ gnarf = d2 - d;
+ } else {
+ continue;
+ }
+ dsum += gnarf;
+swap_nextn:;
+ }
+ if (dsum < tdsum) {
+ ti = i;
+ th = j;
+ tdsum = dsum;
+ }
+swap_nextj:;
+ }
+ }
+ if (tdsum >= 0)
+ break;
+ clusters[ti].medoid = th;
+ }
+
+ for (i = 0; i < k; i++)
+ clusters[i].n = 0;
+
+ for (i = 0; i < l; i++) {
+ d = INFINITY;
+ for (j = 0; j < k; j++) {
+ if (distances[distanceindex(clusters[j].medoid, i)] < d) {
+ assoc[i] = j;
+ d = distances[distanceindex(clusters[j].medoid, i)];
+ }
+ }
+ clusters[assoc[i]].n++;
+ }
+
+ for (i = 0; i < l; i++) {
+ if (clusters[assoc[i]].medoid == i)
+ assoc[i] |= 128;
+ }
+
+ result = k;
+cleanup:
+ free(clusters);
+ return result;
+}
+
+size_t
+diff(size_t a, size_t b)
+{
+ if (a < b)
+ return b - a;
+ return a - b;
+}
+
+/*
+ https://en.wikipedia.org/wiki/Levenshtein_distance#Iterative_with_two_matrix_rows
+ Which references https://www.codeproject.com/Articles/13525/Fast-memory-efficient-Levenshtein-algorithm-2
+ The code there is using the CPOL: https://www.codeproject.com/info/cpol10.aspx
+*/
+size_t
+levenshteindistance(char *a, char *b)
+{
+ size_t i, j, dc, ic, sc, r;
+ size_t *v0, *v1, *t;
+
+ v0 = xmalloc((strlen(b) + 1) * sizeof(*v0));
+ v1 = xmalloc((strlen(b) + 1) * sizeof(*v1));
+
+ for (i = 0; i <= strlen(b); i++)
+ v0[i] = i;
+
+ for (i = 0; i < strlen(a); i++) {
+ v1[0] = i + 1;
+ for (j = 0; j < strlen(b); j++) {
+ dc = v0[j + 1] + 1;
+ ic = v1[j] + 1;
+ if (a[i] == b[j])
+ sc = v0[j];
+ else
+ sc = v0[j] + 1;
+ if (dc < ic && dc < sc)
+ v1[j + 1] = dc;
+ else if (ic < dc && ic < sc)
+ v1[j + 1] = ic;
+ else
+ v1[j + 1] = sc;
+ }
+ t = v0;
+ v0 = v1;
+ v1 = t;
+ }
+
+ r = v0[strlen(b)];
+
+ free(v1);
+ free(v0);
+
+ return r;
+}
+
+/*
+ Remove common prefix and suffix. The pazz0distance is the sum of the length of the leftover strings.
+*/
+size_t
+pazz0distance(char *a, char *b)
+{
+ size_t pl, sl;
+ size_t al, bl;
+
+ al = strlen(a);
+ bl = strlen(b);
+
+ for (pl = 0; pl < al && pl < bl && a[pl] == b[pl]; pl++);
+ for (sl = 0; al - sl > pl && bl - sl > pl && a[al - 1 - sl] == b[bl - 1 - sl]; sl++);
+
+ return al + bl - 2 * (pl + sl);
+}
+
+/*
+ https://en.wikipedia.org/wiki/Levenshtein_distance#Recursive
+*/
+size_t
+levenshteindistance_slow(char *a, char *b)
+{
+ size_t k, l, m;
+
+ if (!*a)
+ return strlen(b);
+ else if (!*b)
+ return strlen(a);
+ else if (*a == *b)
+ return levenshteindistance_slow(a + 1, b + 1);
+
+ k = levenshteindistance_slow(a + 1, b);
+ l = levenshteindistance_slow(a, b + 1);
+ m = levenshteindistance_slow(a + 1, b + 1);
+
+ if (k < l && k < m)
+ return 1 + k;
+ else if (l < k && l < m)
+ return 1 + l;
+ else
+ return 1 + m;
+}
+
+ssize_t
+clustering(char *host, char *port, char *selector, struct gopherentry *g, size_t l, size_t *assocs, size_t k)
+{
+ size_t i, j;
+ ssize_t r;
+ double *distances, d;
+ double *mdists;
+ struct {
+ double selector;
+ double segment;
+ double index;
+ } *distanceparts, maxdistanceparts;
+ uint32_t prng;
+ struct point *points;
+
+ distanceparts = xmalloc(triangularnumber(l) * sizeof(*distanceparts));
+
+ /* So the sammon mapping can be happy. */
+ prng = fnv1a(4, host, port, selector, "distancewiggle");
+ memset(&maxdistanceparts, 0, sizeof(maxdistanceparts));
+ for (i = 0; i < l; i++) {
+ memset(&distanceparts[distanceindex(i, i)], 0, sizeof(*distanceparts));
+ for (j = 0; j < i; j++) {
+ distanceparts[distanceindex(i, j)].selector = levenshteindistance(g[i].fields[1], g[j].fields[1]) + levenshteindistance(g[i].fields[2], g[j].fields[2]) + levenshteindistance(g[i].fields[3], g[j].fields[3]) + ((double)xorshift(&prng) / UINT32_MAX) * 0.000000000000000000000000000000000000001;
+ distanceparts[distanceindex(i, j)].segment = diff(g[i].s, g[j].s);
+ distanceparts[distanceindex(i, j)].index = diff(g[i].i, g[j].i);
+
+ if (distanceparts[distanceindex(i, j)].selector > maxdistanceparts.selector)
+ maxdistanceparts.selector = distanceparts[distanceindex(i, j)].selector;
+ if (distanceparts[distanceindex(i, j)].segment > maxdistanceparts.segment)
+ maxdistanceparts.segment = distanceparts[distanceindex(i, j)].segment;
+ if (distanceparts[distanceindex(i, j)].index > maxdistanceparts.index)
+ maxdistanceparts.index = distanceparts[distanceindex(i, j)].index;
+ }
+ }
+ for (i = 0; i < l; i++) {
+ for (j = 0; j < i; j++) {
+ distanceparts[distanceindex(i, j)].selector /= maxdistanceparts.selector;
+ distanceparts[distanceindex(i, j)].segment /= maxdistanceparts.segment;
+ distanceparts[distanceindex(i, j)].index /= maxdistanceparts.index;
+ }
+ }
+
+ distances = xmalloc(triangularnumber(l) * sizeof(*distances));
+ for (i = 0; i < l; i++) {
+ distances[distanceindex(i, i)] = 0;
+ for (j = 0; j < i; j++)
+ distances[distanceindex(i, j)] = 0.75 * distanceparts[distanceindex(i, j)].selector + 0.125 * distanceparts[distanceindex(i, j)].segment + 0.125 * distanceparts[distanceindex(i, j)].index; //distanceparts[distanceindex(i, j)].selector;
+ }
+ free(distanceparts);
+
+ r = kmedoids(g, l, k, distances, assocs, fnv1a(4, host, port, selector, "seed"));
+
+ points = calloc(l, sizeof(*points));
+ sammon(distances, l, fnv1a(4, host, port, selector, "sammonseed"), points);
+
+ for (i = 0; i < l; i++)
+ printf("%lf\t%lf\t%ld\t%lu\t%s\n", points[i].dims[0], points[i].dims[1], assocs[i] & 127, assocs[i] >> 7, g[i].fields[1]);
+
+ free(points);
+ free(distances);
+
+ return r;
+}
+
+char gopherbuffer[16 * 1024 * 1024];
+
+int
+main(int argc, char *argv[])
+{
+ ssize_t l;
+ size_t k;
+ struct gopherentry *g;
+ char *host, *port, *selector;
+ size_t *cassocs;
+
+ if (argc < 2) {
+ printf("%s host [port [selector]]\n", argv[0]);
+ return 1;
+ }
+
+ host = argv[1];
+ port = (argc > 2) ? argv[2] : "70";
+ selector = (argc > 3) ? argv[3] : "";
+
+ l = gopher_request(host, port, selector, gopherbuffer, sizeof(gopherbuffer));
+ if (l == -1)
+ return 1;
+
+ l = gopher_parsemenu(gopherbuffer, &g);
+ if (l == -1)
+ return 1;
+
+ l = gopher_removeentries(g, l);
+
+ cassocs = calloc(l, sizeof(*cassocs));
+ if (!cassocs)
+ return 1;
+ k = 5;
+ clustering(host, port, selector, g, l, cassocs, k);
+
+ return 0;
+}
(DIR) diff --git a/gopher/gopher-clustering/hint.md b/gopher/gopher-clustering/hint.md
@@ -0,0 +1,9 @@
+
+14:28:09 pazz0 | `egcc -Ofast -o gopher-clustering
+gopher-clustering.c -lm`
+14:28:19 pazz0 | `./gopher-clustering bitreich.org 70 /index.gph
+| sed '1,/^Silh/d' > clustering-output`
+14:28:34 pazz0 | And in gnuplot: `plot 'clustering-output' using
+1:2:5:(4+$4):3 with labels point pt var lc
+ | var`
+