/* Copyright ⓒ Rafał Maszkowski 2019, 2020, GNU GPL3+ * program to compare sound files ASE extracts or to search one in another */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #define MIN3(a, b, c) ( MIN( (a), MIN( (b), (c) ) ) ) /* not evaluation safe */ #define MAX3(a, b, c) ( MAX( (a), MAX( (b), (c) ) ) ) #define SEC 100 /* steps per second */ #define GUARDIAN 10*60*SEC /* 10 min margin */ #define REAL float /* for linreg */ #define MODE_CMP 1 #define MODE_SEARCH 2 #define MODE_DIST 3 #define PRE_PARTS 2 #define PRE_MINSIG 8 static int verbose_opt = 0, mode = MODE_SEARCH, hourbeg_flag = 1, swing_opt = 5*SEC /* cmp with +-5 s shift */, swingmul_opt = 6, beg_opt = 0, end_opt = MAXINT /* s*SEC */, prestep_opt = 10, step_opt = 1; static float detlim_opt = 0.05 /* sy/swing */; struct stats { float av, min, max, sigma; int tmax, faults, shifts; }; double timed() { struct timespec now; clock_gettime(CLOCK_REALTIME, &now); return now.tv_sec + now.tv_nsec*1e-9; } int startt(char *name) { char* pos = strrchr(name, '.'); if (!pos) return 0; if (pos-name < 4) return 0; if (!isdigit(pos[-1]) || !isdigit(pos[-2]) || !isdigit(pos[-3]) || !isdigit(pos[-4])) return 0; return 600*(pos[-4]-48) + 60*(pos[-3]-48) + 10*(pos[-2]-48) + (pos[-1]-48); } /* arrays based on https://www.eskimo.com/~scs/cclass/int/sx9b.html */ int readase(char *path, int *rows, int *cols, float ***data) { #define ROWSBEG (360000) #define ROWSINC (60000) int row, rowsalloc = 0, rowsallocp = 0; ssize_t num; FILE *file; file = fopen(path, "r"); if (file == NULL) { dprintf(STDERR_FILENO, "%d:ERROR %d (%s) at fopen(%s, \"r\"), exiting…\n", __LINE__, errno, strerror(errno), path); exit(20); } num = fread(cols, sizeof(*cols), 1, file); if (num < 1) { dprintf(STDERR_FILENO, "%d:ERROR while fread() from %s%s, exiting…\n", __LINE__, path, feof(file)?": END OF FILE":""); exit(21); } *rows = 0; rowsalloc = ROWSBEG; *data = malloc(rowsalloc * sizeof(**data)); if (!*data) { dprintf(STDERR_FILENO, "%d:Cannot malloc(%ld) to *data, exiting…\n", __LINE__, rowsalloc * sizeof(**data)); exit(22); } for (row = rowsallocp; row < rowsalloc; row++) { (*data)[row] = malloc(*cols * sizeof(***data)); if (!(*data)[row]) { dprintf(STDERR_FILENO, "%d:Cannot malloc(%ld) to (*data)[%d], exiting…\n", __LINE__, *cols * sizeof(***data), row); exit(23); } } rowsallocp = rowsalloc; while ( num = fread((*data)[*rows], sizeof(***data), *cols, file) ) { if (num < *cols) { dprintf(STDERR_FILENO, "%d:ERROR while fread() from %s%s, exiting…\n", __LINE__, path, feof(file)?": END OF FILE":""); exit(24); } (*rows)++; if ( (*rows) >= rowsalloc) { rowsalloc += ROWSINC; *data = realloc(*data, rowsalloc * sizeof(**data)); if (!*data) { dprintf(STDERR_FILENO, "%d:Cannot realloc(%ld) to *data, exiting…\n", __LINE__, rowsalloc * sizeof(**data)); exit(25); } for (row = rowsallocp; row < rowsalloc; row++) { (*data)[row] = malloc(*cols * sizeof(***data)); if (!(*data)[row]) { dprintf(STDERR_FILENO, "%d:Cannot malloc(%ld) to (*data)[%d], exiting…\n", __LINE__, *cols * sizeof(***data), row); exit(26); } } rowsallocp = rowsalloc; } } if (fclose(file)) { dprintf(STDERR_FILENO, "%d:ERROR %d (%s) at fclose() of %s, exiting…\n", __LINE__, errno, strerror(errno), path); exit(27); } *data = realloc(*data, ((*rows)+GUARDIAN) * sizeof(**data)); if (!*data) { dprintf(STDERR_FILENO, "%d:Cannot realloc(%ld) to *data, exiting…\n", __LINE__, ((*rows)+GUARDIAN) * sizeof(**data)); exit(21); } memset(*data + *rows, 0, GUARDIAN * sizeof(**data)); return 0; } float asecmp(float **data1, float **data2, int rows, int cols, int *faults) { int row, col; float sum = 0, diff; for (row = 0; row <= rows; row++) { if (data1[row] != NULL && data2[row] != NULL) { for (col = 0; col < cols; col++) { diff = data1[row][col] - data2[row][col]; sum += diff * diff; } } else { (*faults)++; } } return sqrtf(sum/rows/cols); } int aseshift(float **data1, int rows1, float **data2, int rows2, int cols, int step, struct stats *asestats) { /* assuming rows1 <= rows2 */ int shift, shifts, tmax = -1, faults = 0; float val, valp, diff, sdiff = 0, sdiff2 = 0, min = HUGE_VALF, max = -1; shifts = rows2 - rows1 + 1; valp = asecmp(data1, data2 + 0, rows1, cols, &faults); /* printf("faults on 1st ret.: %d\n", faults); */ for (shift = 1; shift < shifts; shift += step) { val = asecmp(data1, data2 + shift, rows1, cols, &faults); /* printf("faults on ret.: %d @shift %d\n", faults, shift); */ diff = fabsf(val - valp); sdiff += diff; sdiff2 += diff * diff; if (diff > max) { max = diff; tmax = shift; } if (diff < min) min = diff; valp = val; /* printf("%.8f %.8f\n", val, diff); */ } asestats->av = sdiff/(shifts/step); asestats->min = min; asestats->max = max; asestats->tmax = tmax; asestats->sigma = sqrtf( sdiff2/(shifts/step) - (sdiff/(shifts/step)) * (sdiff/(shifts/step)) ); asestats->shifts = (shifts/step); asestats->faults = faults; return tmax; } int linreg(int n, const REAL x[], const REAL y[], REAL* a, REAL* b, REAL* r, REAL *s, REAL* avy) { /* https://stackoverflow.com/questions/5083465/fast-efficient-least-squares-fit-algorithm-in-c */ REAL sumx = 0.0, sumx2 = 0.0, sumxy = 0.0, sumy = 0.0, sumy2 = 0.0; for (int i = 0; i < n; i++) { sumx += x[i]; sumx2 += x[i]*x[i]; sumxy += x[i] * y[i]; sumy += y[i]; sumy2 += y[i]*y[i]; } REAL denom = (n * sumx2 - sumx*sumx); if (verbose_opt >= 3) printf("sumx: %f sumx2: %f sumxy: %f sumy: %f sumy2: %f denom: %f\n", sumx, sumx2, sumxy, sumy, sumy2, denom); if (denom == 0) { *a = 0; *b = 0; if (r) *r = 0; return 1; } /* singular matrix. can't solve the problem. */ *a = (n * sumxy - sumx * sumy) / denom; *b = (sumy * sumx2 - sumx * sumxy) / denom; if (r != NULL) { *r = (sumxy - sumx*sumy/n) / sqrtf((sumx2 - sumx*sumx/n) * (sumy2 - sumy*sumy/n)); /* compute correlation coeff */ if (s != NULL) *s = sqrtf(sumy2 / n - sumy*sumy /n/n) * sqrtf(1 - *r * *r); /* http://davidmlane.com/hyperstat/A120567.html */ } else if (s != NULL) *s = sqrtf(sumy2 / n - sumy*sumy /n/n); /* just sy if r omitted */ *avy = sumy / n; return 0; } void usage(char *name) { printf("Usage: %s [-c] [-s] [-h] [-v] FILE1 FILE2\n", name); printf("-c, --cmp compare two files\n"); printf("-s, --search search for small FILE1 in large FILE2\n"); printf("-f, --filebeg the result of search from start of FILE2 instead of from begin of FILE2 hour\n"); printf("-m, --margin MARGIN compare within MARGIN s, default: %d s\n", swing_opt/SEC); printf("-M, --mult FACTOR in 1st fast cmp use MARGIN*FACTOR s, default FACTOR: %d\n", swingmul_opt); printf("-L, --detlim LEVEL fit if sy/max_s(swing) > LEVEL, default: %g\n", detlim_opt); printf("-b, --beg BEGIN compare starting BEGIN s from beginning of the hour\n"); printf("-e, --end END compare ending END s from beginning of the hour\n"); printf("-P, --prestep STEP use every STEP row in preliminary part, default every %dth\n", prestep_opt); printf("-S, --step STEP use every STEP row in the main loop, default every %dst\n", step_opt); printf("-h, --help this help\n"); printf("-v, --verbose more verbosity\n"); } int main(int argc, char *argv[]) { char *name1, *name2, *ind; int err, rows1, cols1, rows2, cols2, cols, startt1, startt2, optchar, fit = 0; float **data1, **data2, hour = 0; double clbeg = -666, clrdbeg = -666, clrdend = -666, clend = -666; /* guard us */ clbeg = timed(); /* unconditionally cos verbose_opt not read yet */ while (1) { static struct option long_options[] = { { "beg", required_argument, 0, 'b' }, { "end", required_argument, 0, 'e' }, { "margin", required_argument, 0, 'm' }, { "mult", required_argument, 0, 'M' }, { "detlim", required_argument, 0, 'L' }, { "prestep", required_argument, 0, 'P' }, { "step", required_argument, 0, 'S' }, { "cmp", no_argument, 0, 'c' }, { "search", no_argument, 0, 's' }, /* default */ { "filebeg", no_argument, 0, 'f' }, { "help", no_argument, 0, 'h' }, { "verbose", no_argument, 0, 'v' }, { NULL, 0, NULL, 0 } }; int option_index = 0; optchar = getopt_long (argc, argv, "b:e:m:M:LP:S:csfhhv", long_options, &option_index); if (optchar == -1) break; /* Detect the end of the options. */ switch(optchar) { case 'h': usage(argv[0]); break; case 'b': beg_opt = (int)(atof(optarg)*SEC); break; case 'e': end_opt = (int)(atof(optarg)*SEC); break; case 'c': mode = MODE_CMP; break; case 'f': hourbeg_flag = 0; break; case 's': mode = MODE_SEARCH; break; case 'm': swing_opt = abs(atoi(optarg))*SEC; break; case 'M': swingmul_opt = abs(atoi(optarg)); break; case 'L': detlim_opt = abs(atof(optarg)); break; case 'P': prestep_opt = abs(atoi(optarg)); break; case 'S': step_opt = abs(atoi(optarg)); break; case 'v': verbose_opt++; break; default: usage(argv[0]); exit(0); } } if (optind + 2 != argc) { dprintf(STDERR_FILENO, "%d:Two file arguments needed, exiting…\n", __LINE__); exit(10); } name1 = argv[optind]; startt1 = startt(name1) * SEC; name2 = argv[optind+1]; startt2 = startt(name2) * SEC; if (verbose_opt >= 1) clrdbeg = timed(); err = readase(name1, &rows1, &cols1, &data1); if (err) { dprintf(STDERR_FILENO, "%d:Cannot open(%s), exiting…\n", __LINE__, name1); exit(11); } err = readase(name2, &rows2, &cols2, &data2); if (err) { dprintf(STDERR_FILENO, "%d:Cannot open(%s), exiting…\n", __LINE__, name2); exit(12); } if (verbose_opt >= 1) clrdend = timed(); if (cols1 != cols2) dprintf(STDERR_FILENO, "%d:cols1: %d, cols2: %d\n", __LINE__, cols1, cols2); cols = cols1; if (cols2 < cols1) cols = cols2; if ( (ind = strchr(name2, '-')) && (strlen(ind) >= 10) ) hour = 10 * (ind[9]-'0') + ind[10]-'0'; /* tvp3po-2018120102… */ if (verbose_opt >= 2) printf("FILES %s %d + %.3f s * %d %s %d + %.3f s * %d\n", name1, startt1/SEC, (float)rows1/SEC, cols1, name2, startt2/SEC, (float)rows2/SEC, cols2); if(mode == MODE_SEARCH) { int tmax; struct stats asestats; float tb, thb; tmax = aseshift(data1, rows1, data2, rows2, cols, prestep_opt, &asestats); tb = ((float)tmax)/SEC; thb = tb + startt2/SEC; if (verbose_opt >= 2) printf("tmax: %5d tb: %.3f s (%02.0f:%02.0f) thb: %.3f (%02.0f:%02.0f) ", tmax, tb, tb/60, tb-(int)(tb/60)*60, thb, thb/60, thb-(int)(thb/60)*60); if (verbose_opt >= 2) printf("av: %.10f min/max: %.10f/%.10f sigma: %.10f max/av: %f (max-av)/sig: %f faults/sh: %f\n", asestats.av, asestats.min, asestats.max, asestats.sigma, asestats.max/asestats.av, (asestats.max-asestats.av)/asestats.sigma, (float)asestats.faults/asestats.shifts); /* cmp now: divide into few parts and fit each separately in few times bigger window */ int parts = 4; REAL timesx[parts], timesy[parts], a, b, r, s, avy; for(int i = 0; i <= parts; i++) { timesx[i] = rows1 * i / parts; timesy[i] = aseshift(data1 + (int)(timesx[i]), rows1/parts, data2 + tmax - rows1, 2 * rows1, cols, 1, &asestats); if (verbose_opt >= 3) printf("%f %f av: %.10f min/max: %.10f/%.10f sigma: %.10f max/av: %f (max-av)/sig: %f faults/sh: %f\n", timesx[i]/SEC, timesy[i]/SEC, asestats.av, asestats.min, asestats.max, asestats.sigma, asestats.max/asestats.av, (asestats.max-asestats.av)/asestats.sigma, (float)asestats.faults/asestats.shifts); if (verbose_opt >= 1) printf("%6.3f %7.3f %6.2f %6.2f %s %s %s\n", hour + timesx[i]/SEC/3600, timesy[i]/SEC, (asestats.max-asestats.av)/asestats.sigma, (float)asestats.faults/asestats.shifts, name1, name2, "PART t,del,(max-av)/sig,flt/sh,n1,n2"); } int err = linreg(parts, timesx, timesy, &a, &b, &r, &s, &avy); if (verbose_opt >= 1) { if (err) printf("ERROR: singular matrix in linreg()\n"); clend = timed(); } if (verbose_opt >= 2) printf("a: %f b: %f r: %f s: %f\n", a, b, r, s); if (r > 0.90) { /* WRONG! USE s INSTEAD */ if (verbose_opt >= 1) { if (hourbeg_flag) printf("%f\n", thb); /* begin of name1 is thb s from the begin name2 hour */ } else { printf("%f\n", tb); } /* begin of name1 is thb s from the begin of name2 */ return 0; /* success */ } return 1; /* not fit */ } /*max(beg1-5, beg2, beglim) min(end1+5, end2, endlim); divide remaining data1 into 10 parts, calculate middle points; swing each part against data2 middle-5…middle+5; fit a straight line; qualify acc. to r (default or given as an option)*/ if(mode == MODE_CMP) { int beg, end, len, i = 0, parts = 10; struct stats asestats; REAL timesx[parts], timesy[parts], del0, a, b, s, avy, srel; int swing0_eff = -swingmul_opt * swing_opt, swing1_eff = swingmul_opt * swing_opt; beg = MAX3(0, startt2 - startt1 - swing0_eff, beg_opt - startt1); /* beg==0 at data1 fragment start */ end = MIN3(rows1, startt2 + rows2 - startt1 - swing1_eff, end_opt - startt1); len = (end-beg) / parts * PRE_PARTS; /* arbitrary length of PRE_PARTS parts */ timesx[i] = startt1 + beg + i*len + len/2; /* middle of data1 chunk */ if (len < 300 && verbose_opt >= -1) printf("%6.3f %s %s WARNING: beg %d end %d len %d (beg>end or len<300)!\n", hour + timesx[i]/SEC/3600, name1, name2, beg, end, len); del0 = timesy[i] = aseshift(data1 + beg + i*len, len, data2 + startt1 - startt2 + beg + i*len + swing0_eff, len + (swing1_eff-swing0_eff), cols, prestep_opt, &asestats) + swing0_eff; /* from middle position */ if (verbose_opt >= 3) printf("%f %f av: %.10f min/max: %.10f/%.10f sigma: %.10f max/av: %f (max-av)/sig: %f faults/sh: %f\n", timesx[i]/SEC, timesy[i]/SEC, asestats.av, asestats.min, asestats.max, asestats.sigma, asestats.max/asestats.av, (asestats.max-asestats.av)/asestats.sigma, (float)asestats.faults/asestats.shifts); if (verbose_opt >= 2) printf("%6.3f %7.3f %6.2f %6.2f %s %s %s\n", hour + timesx[i]/SEC/3600, timesy[i]/SEC, (asestats.max-asestats.av)/asestats.sigma, (float)asestats.faults/asestats.shifts, name1, name2, "PRE t,del,(max-av)/sig,flt/sh,n1,n2"); swing0_eff = -swing_opt; swing1_eff = swing_opt; /* no broadened range for the partial comparisons! */ if ( (asestats.max-asestats.av)/asestats.sigma > PRE_MINSIG ) { swing0_eff = (int)(timesy[i]) - swing_opt; swing1_eff = (int)(timesy[i]) + swing_opt; } /* but maybe adjusted */ /* data1 data2 limits */ beg = MAX3(0, startt2 - startt1 - swing0_eff, beg_opt - startt1); /* beg==0 at data1 fragment start */ end = MIN3(rows1, startt2 + rows2 - startt1 - swing1_eff, end_opt - startt1); len = (end-beg) / parts; /* may be slightly rounded down, never mind… */ for (i = 0; i < parts; i++) { timesx[i] = startt1 + beg + i*len + len/2; /* middle of data1 chunk */ timesy[i] = aseshift(data1 + beg + i*len, len, data2 + startt1 - startt2 + beg + i*len + swing0_eff, len + (swing1_eff-swing0_eff), cols, step_opt, &asestats) - (swing1_eff-swing0_eff)/2; /* from middle position */ if (verbose_opt >= 3) printf("%f %f av: %.10f min/max: %.10f/%.10f sigma: %.10f max/av: %f (max-av)/sig: %f faults/sh: %f\n", timesx[i]/SEC, timesy[i]/SEC, asestats.av, asestats.min, asestats.max, asestats.sigma, asestats.max/asestats.av, (asestats.max-asestats.av)/asestats.sigma, (float)asestats.faults/asestats.shifts); if (verbose_opt >= 2) printf("%6.3f %7.3f %6.2f %6.2f %s %s %s\n", hour + timesx[i]/SEC/3600, timesy[i]/SEC, (asestats.max-asestats.av)/asestats.sigma, (float)asestats.faults/asestats.shifts, name1, name2, "PART t,del,(max-av)/sig,flt/sh,n1,n2"); } int err = linreg(parts, timesx, timesy, &a, &b, NULL, &s, &avy); srel = s/(swing1_eff-swing0_eff); /* std. dev. relative to maximum */ fit = srel < detlim_opt; if (verbose_opt >= 1) { if (err) printf("ERROR: singular matrix in linreg()\n"); clend = timed(); printf("%6.3f %7.3f %f %s %s %s rd %.3f s tot %.3f s %s\n", hour + ((float)(startt1 + beg))/SEC/3600, (avy+del0)/SEC, srel, name1, name2, fit?"FIT":"FAIL", clrdend-clrdbeg, clend-clbeg, "FINAL t,del,s,n1,n2"); } if (fit) return 0; return 1; /* not fit */ } /* person recognition - based on ASE power distribution in bands? */ if (mode == MODE_DIST) { } return 1; } /* matchase - make full swing possible correlate - including for(time) loop scenarios: - large/confirmation: 5-55/10 min - 20-60 s, 10-50/10 min - 20-60 s ?, whole in 5 min chunks +-5 s?, whole +-5 s? - simple search: cmp short sample with a long one, or in range ideas: - adaptive comparison: large swing for the first fragment, reduced ones later (increases performance) - asymetric swing, e.g. -15…-5 - -O3 problems: - quality assesment - performance - same sound and almost same image - local branch sign on in video? - how to reduce - replace file to symlink to the file left? */ .