/*
 *  ease.c
 *
 *  Extract Audio Spectrum Envelope
 *
 *  This program extracts audio spectrum envelopes from monaural
 *  sound files.  The extraction algorithm conforms to the
 *  International Standard ISO/IEC 15938-4:2002 (also known as
 *  MPEG-7 Audio).
 *
 *  When invoked with the -i switch, the resulting output would
 *  no longer be standard-compliant.  Instead, it would be in an
 *  intermediate format, where the data consists of an int which
 *  denotes the column size n, followed by a mxn matrix in double
 *  precision, with m frames and n bins in row major order
 *  (n * i + j).
 *
 *  This program assumes a high QoI of floating point
 *  mathematics, and the availability of the FFTW and libsndfile
 *  libraries.
 *
 *  Copyright (C) 2002-2003 Tak-Shing Chan
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public License
 * as published by the Free Software Foundation; either version 2
 * of the License, or (at your option) any later version.
 *
 * $Log: ease.c,v $
 * Revision 1.1.1.1  2003/12/18 21:25:31  chan12
 * Initial import.
 *
 */

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <limits.h>
#include <math.h>
#include <fftw3.h>
#include <unistd.h>
#include <sndfile.h>

/* The famous constant */
#ifndef M_PI
#define M_PI		3.14159265358979323846
#endif

/* Look-up table for octaveResolution */
static struct {
    const char	*name;
    double	value;
} LUT[] = {
    { "1/16",	.0625	},
    { "1/8",	.125	},
    { "1/4",	.25	},
    { "1/2",	.5	},
    { "1",	1	},
    { "2",	2	},
    { "4",	4	},
    { "8",	8	}
};

/*
 * Calculate the next power of two of x.  This function assumes
 * a positive x smaller than INT_MAX / 2.
 */
static int
npow2(double x)
{
    int		exp;

    if (frexp(x, &exp) == 0.5)
	exp--;
    return 1 << exp;
}

/* A Perl-like error handler */
static void
die(const char *msg)
{
    perror(msg);
    exit(EXIT_FAILURE);
}

/*
 * Extract the audio spectrum envelope of an input file to the
 * output file, using the AudioSpectrumEnvelope D format.  The
 * permuted argument order is a workaround for an old gcc bug
 * on UltraSPARCs.
 */
static void
ExtractAudioSpectrumEnvelope(double sr,
			double loEdge,
			double hiEdge,
			double *winsize,
			const char *hopSize,
			const char *octaveResolution,
			SNDFILE *ifp,
			FILE *ofp,
			int binary,
			sf_count_t frames)
{
    int		PTN, PTF, n, lw, NFFT, nEdges, m, hop;
    double	r = 0, h, dy, DF, *edge, **band, y = 0;
    double	*w, *x, *X, *L;
    fftw_plan	plan;
    size_t	expected, actual = 0;
    sf_count_t	framesread;

    /* Nyquist rate check */
    if (hiEdge > sr / 2) {
	fprintf(stderr, "hiEdge is beyond the Nyquist rate\n");
	exit(EXIT_FAILURE);
    }

    /* Hopsize check */
    if (sscanf(hopSize, "PT%dN%dF", &PTN, &PTF) != 2) {
	fprintf(stderr, "hopSize is invalid\n");
	exit(EXIT_FAILURE);
    }

    /* Set r to the numerical octaveResolution */
    for (n = 0; n < sizeof LUT / sizeof *LUT; n++) {
        if (!strcmp(LUT[n].name, octaveResolution)) {
	    r = LUT[n].value;
            break;
        }
    }
    if (n == sizeof LUT / sizeof *LUT) {
        fprintf(stderr, "Invalid octaveResolution\n");
	exit(EXIT_FAILURE);
    }

    /* Calculate various parameters according to 1.1.2.3 */
    dy = modf(sr * PTN / PTF, &h);		/* For DDA */
    lw = floor(sr * *winsize + 0.5);
    NFFT = npow2(lw);
    DF = sr / NFFT;
    nEdges = floor(log(hiEdge / loEdge) / log(2) / r + 2.5);

    /* Allocate dynamic arrays */
    if (!(edge = calloc(nEdges, sizeof *edge))	||
	!(band = calloc(nEdges, sizeof *band))	||
	!(w = calloc(NFFT, sizeof *w))		||
	!(x = calloc(NFFT, sizeof *x))		||
	!(X = calloc(NFFT, sizeof *X))		||
	!(L = calloc(NFFT, sizeof *L)))
        die("calloc");
    for (m = 0; m < nEdges; m++)
        if (!(band[m] = calloc(NFFT, sizeof **band)))
            die("calloc");

    /* Calculate edge locations */
    for (m = 0; m < nEdges; m++)
        edge[m] = loEdge * pow(2, r * m) / DF;

    /* Determine below-band weightings (band[0]) */
    for (n = 0; n < edge[0] - 0.5; n++)
        band[0][n] = 1;
    band[0][n] = 0.5 - n + edge[0];
    while (++n < NFFT / 2 + 1)
        band[0][n] = 0;

    /* Determine in-band weightings (band[1...nEdges - 2]) */
    for (m = 1; m < nEdges - 1; m++) {
        for (n = 0; n < edge[m - 1] - 0.5; n++)
            band[m][n] = 0;
        band[m][n] = n - edge[m - 1] + 0.5;
        while (++n < edge[m] - 0.5)
            band[m][n] = 1;
        band[m][n] = 0.5 - n + edge[m];
        while (++n < NFFT / 2 + 1)
            band[m][n] = 0;
    }

    /* Determine above-band weightings (band[nEdges - 1]) */
    for (n = 0; n < edge[m - 1] - 0.5; n++)
        band[m][n] = 0;
    band[m][n] = n - edge[m - 1] + 0.5;
    while (++n < NFFT / 2 + 1)
        band[m][n] = 1;

    /* Create Hamming window of length lw */
    for (n = 0; n < lw; n++)
        w[n] = 0.54 - 0.46 * cos(2 * M_PI * n / (lw - 1));

    /* Create FFTW plan */
    plan = fftw_plan_r2r_1d(NFFT, x, X, FFTW_R2HC, FFTW_MEASURE);

    /* Read the first lw samples */
    if (sf_read_double(ifp, x, lw) != lw) {
	fprintf(stderr, "File shorter than analysis window!\n");
	exit(EXIT_FAILURE);
    }

    /* Output header */
    expected = ceil(((frames - lw) / h) + 1);
    if (binary) {
	/* Output number of edges to file */
	if (fwrite(&nEdges, sizeof nEdges, 1, ofp) != 1)
	    die("fwrite");
    } else {
	fputs("<?xml version=\"1.0\" encoding=\"iso-8859-1\"?>\n", ofp);
	fputs("<Mpeg7 xmlns=\"urn:mpeg:mpeg7:schema:2001\"\n", ofp);
	fputs("       xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\"\n", ofp);
	fputs("       xmlns:mpeg7=\"urn:mpeg:mpeg7:schema:2001\"\n", ofp);
	fputs("       xmlns:xml=\"http://www.w3.org/XML/1998/namespace\"\n", ofp);
	fputs("       xsi:schemaLocation=\"urn:mpeg:mpeg7:schema:2001 Mpeg7-2001.xsd\">\n", ofp);
	fputs("  <DescriptionUnit xsi:type=\"mpeg7:AudioSpectrumEnvelopeType\"\n", ofp);
	fprintf(ofp, "                   loEdge=\"%f\"\n", loEdge);
	fprintf(ofp, "                   hiEdge=\"%f\"\n", hiEdge);
	fprintf(ofp, "                   octaveResolution=\"%s\">\n", octaveResolution);
	fprintf(ofp, "    <SeriesOfVector hopSize=\"%s\"", hopSize);
	fprintf(ofp, " totalNumOfSamples=\"%lu\"\n", (unsigned long) expected);
	fprintf(ofp, "                    vectorSize=\"%d\">\n", nEdges);
	fprintf(ofp, "      <Raw mpeg7:dim=\"%lu %d\">\n", (unsigned long) expected, nEdges);
    }

    /* Start processing */
    do {
	/* Apply the Hamming window */
	for (n = 0; n < lw; n++)
	    x[n] *= w[n];

	/* Zero out-of-window samples */
	while (++n < NFFT)
	    x[n] = 0;

	/* STFT */
	fftw_execute(plan);

	/* Magnitude squared */
	X[0] *= X[0];
	for (n = 1; n < NFFT / 2; n++)
	    X[n] = X[n] * X[n] + X[NFFT - n] * X[NFFT - n];
	X[n] *= X[n];

	/* Convert to power spectrum coefficients */
	for (n = 0; n < NFFT / 2 + 1; n++)
	    X[n] /= lw * NFFT;

	/* Resample to log scale */
	for (m = 0; m < nEdges; m++)
	    L[m] = 0;
	for (m = 0; m < nEdges; m++)
	    for (n = 0; n < NFFT / 2 + 1; n++)
		L[m] += X[n] * band[m][n];

	/* Output log spectrum to file */
	if (binary) {
	    if (fwrite(L, sizeof *L, nEdges, ofp) != nEdges)
		die("fwrite");
	} else {
	    fprintf(ofp, "       ");
	    for (m = 0; m < nEdges; m++)
		fprintf(ofp, " %.*g", DBL_DIG, L[m]);
	    fputc('\n', ofp);
	}

	/* Find next frame */
	hop = floor(h);			/* Do not round! */
	if ((y += dy) >= 1) {		/* DDA line draw */
	    hop++;
	    y--;
	}
	memmove(x, x + hop, (lw - hop) * sizeof *x);
	framesread = sf_read_double(ifp, x + lw - hop, hop);
	if (framesread < hop)
	    for (n = lw - hop + framesread; n < lw; n++)
		x[n] = 0;
	actual++;
    } while (framesread);
    if (sf_error(ifp))
	die("sf_read_double");
    if (expected != actual)
	fprintf(stderr, "Warning: expected size %lu does not "
		"match actual size %lu!\n", (unsigned long)
		expected, (unsigned long) actual);

    /* Clean up */
    if (!binary) {
	fputs("      </Raw>\n", ofp);
	fputs("    </SeriesOfVector>\n", ofp);
	fputs("  </DescriptionUnit>\n", ofp);
	fputs("</Mpeg7>\n", ofp);
    }
    fftw_destroy_plan(plan);
    free(L);
    free(X);
    free(x);
    free(w);
    for (n = 0; n < nEdges; n++)
	free(band[n]);
    free(band);
    free(edge);
}

/* The main program */
int
main(int argc, char *argv[])
{
    extern char	*optarg;
    extern int	optind;
    int		c, flag = 0, binary = 0;
    const char	*hopSize = "PT10N1000F";
    double	winsize = 0.03;
    double	loEdge = 62.5, hiEdge = 16000;
    const char	*octaveResolution = "1/4";
    SNDFILE	*ifp;
    SF_INFO	sfinfo;
    FILE	*ofp;

    /* Get command options */
    while ((c = getopt(argc, argv, "ih:w:l:u:o:")) != EOF) {
	switch (c) {
	case 'i':
	    binary = 1;
	    break;
	case 'h':
	    hopSize = optarg;
	    break;
	case 'w':
	    if (sscanf(optarg, "%lf", &winsize) != 1)
		flag++;
	    break;
	case 'l':
	    if (sscanf(optarg, "%lf", &loEdge) != 1)
		flag++;
	    break;
	case 'u':
	    if (sscanf(optarg, "%lf", &hiEdge) != 1)
		flag++;
	    break;
	case 'o':
	    octaveResolution = optarg;
	    break;
	case '?':
	    flag++;
	}
    }

    /* Illegal options? */
    if (flag ||	argc != optind + 2) {
	fprintf(stderr, "usage: %s [-i] [-h hopsize] "
		"[-w winsize] [-l loEdge] [-u hiEdge] "
		"[-o octaveResolution] infile outfile\n",
		argv[0]);
	return EXIT_FAILURE;
    }

    /* Open input and output files */
    sfinfo.format = 0;
    if (!(ifp = sf_open(argv[optind], SFM_READ, &sfinfo)))
	die("sf_open");
    if (sfinfo.channels != 1) {
	fprintf(stderr, "Sound file must be monaural!\n");
	exit(EXIT_FAILURE);
    }
    if (binary) {
	if (!(ofp = fopen(argv[optind + 1], "wb")))
	    die("fopen");
    } else {
	if (!(ofp = fopen(argv[optind + 1], "w")))
	    die("fopen");
    }

    /* Extract the audio spectrum envelope */
    ExtractAudioSpectrumEnvelope(sfinfo.samplerate, loEdge,
		hiEdge, &winsize, hopSize, octaveResolution,
		ifp, ofp, binary, sfinfo.frames);

    /* Closing up */
    fclose(ofp);
    sf_close(ifp);
    return EXIT_SUCCESS;
}
