/* * ease.c * * Extract Audio Spectrum Envelope * * This program extracts audio spectrum envelopes from monaural [-*-] sound files. {+*+} The extraction algorithm conforms to the [-*-] International Standard ISO/IEC {+*+} 15938-4:2002 (also known as [-*-] MPEG-7 Audio). * [-*-] When invoked with the -i switch, the resulting output would [-*-] no longer be {+*+} standard-compliant. Instead, it would be in an [-*-] intermediate format, where {+*+} the data consists of an int which [-*-] denotes the column size n, followed by a {+*+} mxn matrix in double [-*-] precision, with m frames and n bins in row major order * (n * i + j). * [-*-] This program assumes a high QoI of floating point [-*-] mathematics, and the {+*+} availability of the FFTW and libsndfile [-*-] libraries. * [-*-] Copyright (C) 2002-2003 Tak-Shing Chan * * This program is free software; you can redistribute it and/or [-*-] modify it {+*+} under the terms of the GNU General Public License [-*-] as published by the Free {+*+} Software Foundation; either version 2 [-*-] of the License, or (at your option) {+*+} any later version. * [-*-] $Log: ease.c,v $ * Revision 1.1.1.1 2003/12/18 21:25:31 chan12 * Initial import. * */ #include #include #include #include #include #include #include #include {+#include +} /* The famous constant */ #ifndef M_PI #define M_PI 3.14159265358979323846 #endif /* Look-up table for octaveResolution */ static struct { const char *name; double value; } LUT[] = { { "1/16", .0625 }, { "1/8", .125 }, { "1/4", .25 }, { "1/2", .5 }, { "1", 1 }, { "2", 2 }, { "4", 4 }, { "8", 8 } }; {+#define LOGVALFLOOR (-200)+} /* [-*-] Calculate the next power of two of x. This function assumes [-*-] a positive x smaller than INT_MAX / 2. */ static int npow2(double x) { int exp; if (frexp(x, &exp) == 0.5) exp--; return 1 << exp; } /* A Perl-like error handler */ static void die(const char *msg) { perror(msg); exit(EXIT_FAILURE); } /* [-*-] Extract the audio spectrum envelope of an input file to the [-*-] output file, {+*+} using the AudioSpectrumEnvelope D format. The [-*-] permuted argument order is a {+*+} workaround for an old gcc bug [-*-] on UltraSPARCs. */ static void ExtractAudioSpectrumEnvelope(double sr, double loEdge, double hiEdge, double *winsize, const char *hopSize, const char *octaveResolution, SNDFILE *ifp, FILE *ofp, int binary, sf_count_t [-frames)-] {+frames, int logres, int nohead)+} { int PTN, PTF, n, lw, NFFT, nEdges, m, hop; double r = 0, h, dy, DF, *edge, **band, y = [-0; double-] {+0,+} *w, *x, *X, [-*L;-] {+*L, outval;+} fftw_plan plan; size_t expected, actual = 0; sf_count_t framesread; [-/* Nyquist rate check */-] if (hiEdge > sr / 2) { fprintf(stderr, "hiEdge is beyond the Nyquist rate\n"); exit(EXIT_FAILURE); } /* [-Hopsize-] {+Nyquist rate+} check */ if (sscanf(hopSize, "PT%dN%dF", &PTN, &PTF) != 2) { fprintf(stderr, "hopSize is invalid\n"); exit(EXIT_FAILURE); } /* {+Hopsize check */ /*+} Set r to the numerical octaveResolution */ for (n = 0; n < sizeof LUT / sizeof *LUT; n++) { if (!strcmp(LUT[n].name, octaveResolution)) { r = LUT[n].value; break; } } if (n == sizeof LUT / sizeof *LUT) { fprintf(stderr, "Invalid octaveResolution\n"); exit(EXIT_FAILURE); } /* Calculate various parameters according to 1.1.2.3 */ dy = modf(sr * PTN / PTF, &h); /* For DDA */ lw = floor(sr * *winsize + 0.5); NFFT = npow2(lw); DF = sr / NFFT; nEdges = floor(log(hiEdge / loEdge) / log(2) / r + 2.5); /* Allocate dynamic arrays */ if (!(edge = calloc(nEdges, sizeof *edge)) || !(band = calloc(nEdges, sizeof *band)) || !(w = calloc(NFFT, sizeof *w)) || !(x = calloc(NFFT, sizeof *x)) || !(X = calloc(NFFT, sizeof *X)) || !(L = calloc(NFFT, sizeof *L))) die("calloc"); for (m = 0; m < nEdges; m++) if (!(band[m] = calloc(NFFT, sizeof **band))) die("calloc"); [-/* Calculate edge locations */-] for (m = 0; m < nEdges; m++) edge[m] = loEdge * pow(2, r * m) / DF; /* {+Calculate edge locations */ /*+} Determine below-band weightings (band[0]) */ for (n = 0; n < edge[0] - 0.5; n++) band[0][n] = 1; band[0][n] = 0.5 - n + edge[0]; while (++n < NFFT / 2 + 1) band[0][n] = 0; /* Determine in-band weightings (band[1...nEdges - 2]) */ for (m = 1; m < nEdges - 1; m++) { for (n = 0; n < edge[m - 1] - 0.5; n++) band[m][n] = 0; band[m][n] = n - edge[m - 1] + 0.5; while (++n < edge[m] - 0.5) band[m][n] = 1; band[m][n] = 0.5 - n + edge[m]; while (++n < NFFT / 2 + 1) band[m][n] = 0; } /* Determine above-band weightings (band[nEdges - 1]) */ for (n = 0; n < edge[m - 1] - 0.5; n++) band[m][n] = 0; band[m][n] = n - edge[m - 1] + 0.5; while (++n < NFFT / 2 + 1) band[m][n] = 1; [-/* Create Hamming window of length lw */-] for (n = 0; n < lw; n++) w[n] = 0.54 - 0.46 * cos(2 * M_PI * n / (lw - 1)); /* Create [-FFTW plan-] {+Hamming window of length lw+} */ plan = fftw_plan_r2r_1d(NFFT, x, X, FFTW_R2HC, FFTW_MEASURE); /* [-Read the first lw samples-] {+Create FFTW plan+} */ if (sf_read_double(ifp, x, lw) != lw) { fprintf(stderr, "File shorter than analysis window!\n"); exit(EXIT_FAILURE); } /* {+Read the first lw samples */ /*+} Output header */ expected = ceil(((frames - lw) / h) + 1); if (binary) { [-/* Output number of edges to file */-] if (fwrite(&nEdges, sizeof nEdges, 1, ofp) != 1) die("fwrite"); {+/* Output number of edges to file */+} } else {+if (!nohead)+} { fputs("\n", ofp); fputs("\n", ofp); fputs(" \n", octaveResolution); fprintf(ofp, " \n", nEdges); fprintf(ofp, " \n", (unsigned long) expected, nEdges); } /* Start processing */ do { [-/* Apply the Hamming window */-] for (n = 0; n < lw; n++) x[n] *= w[n]; /* [-Zero out-of-window samples-] {+Apply the Hamming window+} */ while (++n < NFFT) x[n] = 0; /* [-STFT-] {+Zero out-of-window samples+} */ fftw_execute(plan); /* {+STFT */ /*+} Magnitude squared */ X[0] *= X[0]; for (n = 1; n < NFFT / 2; n++) X[n] = X[n] * X[n] + X[NFFT - n] * X[NFFT - n]; X[n] *= X[n]; [-/* Convert to power spectrum coefficients */-] for (n = 0; n < NFFT / 2 + 1; n++) X[n] /= lw * NFFT; /* {+Convert to power spectrum coefficients */ /*+} Resample to log scale */ for (m = 0; m < nEdges; m++) L[m] = 0; for (m = 0; m < nEdges; m++) for (n = 0; n < NFFT / 2 + 1; n++) L[m] += X[n] * band[m][n]; /* Output log spectrum to file */ if {+(!logres) { if+} (binary) { if (fwrite(L, sizeof *L, nEdges, ofp) != nEdges) die("fwrite"); } else { fprintf(ofp, " "); for (m = 0; m < nEdges; m++) {+{ outval = L[m]; fprintf(ofp, " %.*g", DBL_DIG, outval); } fputc('\n', ofp); } } else { int ncol = 0; float norm[m], srow = 0; for (m = 0; m < nEdges; m++) { if (L[m] > 0) { norm[m] = 10 * log10(L[m]); srow += norm[m]; ncol++; } else { norm[m] = LOGVALFLOOR; } } for (m = 0; m < nEdges; m++) { if (srow != 0) norm[m] /= srow; else norm[m] = 0; } if (binary) { if (fwrite(norm, sizeof *norm, nEdges, ofp) != nEdges) die("fwrite"); } else { for (m = 0; m < nEdges; m++) {+} fprintf(ofp, " %.*g", DBL_DIG, [-L[m]);-] {+norm[m]); }+} fputc('\n', ofp); } {+}+} /* Find next frame */ hop = floor(h); /* Do not round! */ if ((y += dy) >= 1) { /* DDA line draw */ hop++; y--; } memmove(x, x + hop, (lw - hop) * sizeof *x); framesread = sf_read_double(ifp, x + lw - hop, hop); if (framesread < hop) for (n = lw - hop + framesread; n < lw; n++) x[n] = 0; actual++; } while (framesread); if (sf_error(ifp)) die("sf_read_double"); if (expected != actual) fprintf(stderr, "Warning: expected size %lu does not " "match actual size %lu!\n", (unsigned long) expected, (unsigned long) actual); /* Clean up */ if [-(!binary)-] {+(!binary && !nohead)+} { fputs(" \n", ofp); fputs(" \n", ofp); fputs(" \n", ofp); fputs("\n", ofp); } fftw_destroy_plan(plan); free(L); free(X); free(x); free(w); for (n = 0; n < nEdges; n++) free(band[n]); free(band); free(edge); } /* The main program */ int main(int argc, char *argv[]) { extern char *optarg; extern int optind; int c, flag = 0, binary = {+0, logres = 0, nohead =+} 0; const char *hopSize = "PT10N1000F"; double winsize = [-0.03; double-] {+0.03,+} loEdge = 62.5, hiEdge = 16000; const char *octaveResolution = "1/4"; SNDFILE *ifp; SF_INFO sfinfo; FILE *ofp; /* Get command options */ while ((c = getopt(argc, argv, [-"ih:w:l:u:o:"))-] {+"ih:w:l:u:o:LH"))+} != EOF) { switch (c) { case 'i': binary = 1; break; case 'h': hopSize = optarg; break; case 'w': if (sscanf(optarg, "%lf", &winsize) != 1) flag++; break; case 'l': if (sscanf(optarg, "%lf", &loEdge) != 1) flag++; break; case 'u': if (sscanf(optarg, "%lf", &hiEdge) != 1) flag++; break; case 'o': octaveResolution = optarg; break; case {+'L': logres = 1; break; case 'H': nohead = 1; break; case+} '?': flag++; } } /* Illegal options? */ if (flag || argc != optind + 2) { fprintf(stderr, "usage: %s [-i] {+[-L] [-H]+} [-h hopsize] [-" "[-w-] {+[-w+} winsize] [-l loEdge] [-u hiEdge] [-" "[-o-] {+[-o+} octaveResolution] infile outfile\n", argv[0]); {+fprintf(stderr, "-i binary output int cols#, then double type rows\n"); fprintf(stderr, "-L binary output int cols#, then float rows normed\n"); fprintf(stderr, "-H no header/tail for text output\n");+} return EXIT_FAILURE; } /* Open input and output files */ sfinfo.format = 0; if (!(ifp = sf_open(argv[optind], SFM_READ, &sfinfo))) die("sf_open"); if (sfinfo.channels != 1) { fprintf(stderr, "Sound file must be monaural!\n"); exit(EXIT_FAILURE); } if [-(binary) { if (!(ofp = fopen(argv[optind + 1], "wb"))) die("fopen"); } else { if (!(ofp-] {+( !(ofp+} = fopen(argv[optind + 1], [-"w")))-] {+binary?"wb":"w")) )+} die("fopen"); [-}-] /* Extract the audio spectrum envelope */ ExtractAudioSpectrumEnvelope(sfinfo.samplerate, loEdge, hiEdge, &winsize, hopSize, octaveResolution, ifp, ofp, binary, [-sfinfo.frames);-] {+sfinfo.frames, logres, nohead);+} /* Closing up */ fclose(ofp); sf_close(ifp); return EXIT_SUCCESS; } .