/*
 *  csv - preprocess *.csv files
 *
 *  AUTHOR: Thomas Spahni (thomas@spahni.ch)
 *
 *  This program is free software; you can redistribute it and/or
 *  modify it under the terms of the GNU General Public License
 *  as published by the Free Software Foundation; either version 2
 *  of the License, or (at your option) any later version.
 *
 *  This program is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU General Public License for more details.
 *
 *  You should have received a copy of the GNU General Public License
 *  along with this program; if not, write to the Free Software
 *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
 *
 */

#include <stdio.h>
#include <stdlib.h>

/*
 * Files in *.csv format can be a pain to process with sed when they
 * contain '\n' ( <newline> or <line-feed> ) characters within quoted 
 * data fields.
 *
 * This filter will go through the file and determine if a <newline> is
 * part of the data or indicates the end of a record. Any <newline>
 * found as data is converted to '\r' or <carriage-return> and existing
 * carriage-returns are deleted. At the same time this is a conversion
 * from DOS to unix style.
 *
 * Functions performed:
 *  - field separating semicolons are replaced by <tab>
 *  - field delimiting semicolons are deleted
 *  - newlines within quoted data fields become '\r'
 *  - preexisting carriage returns are deleted
 *
 */


int
main (int argc, char *argv[])
{

    int c;

   /*
    * quoted_data is a switch to indicate whether we are in a quoted
    * data field or not. 0 = normal outside, 1 = within data field
    */
 
    int quoted_data = 0;

   /*
    * hanging_quote is a switch to remember if we have already 
    * seen the first one of a pair of double quotes.
    */
 
    int hanging_quote = 0;


    while ((c = getchar()) != EOF) {
	if ( c == '"' ) {
	    /*
	     * quoted_data status does not matter if it is a quote;
	     * this quote could be the beginning or the end of a
	     * data field or it is the beginning of a quote ""
	     */
	    if ( hanging_quote ) {
		/* this is definitely a complete quote now */
		hanging_quote = 0;
		putchar(c);
	    } else {
		hanging_quote = 1;   /* wait to see what follows */
	    }
	} else {
	    /* not a quote character */
	    if ( quoted_data ) {
		/* we are within a quoted data field */
		if ( hanging_quote ) {
		    /* it's a single quote terminating quoted data */
		    quoted_data   = 0;
		    hanging_quote = 0;
		    if ( c == ';' ) {
			putchar('\t');
		    } else if ( c != '\r' ) {
			putchar(c);
		    }
		} else {
		    /* within quoted data, special rules apply */
		    if ( c == '\n' ) {
			putchar('\r');
		    } else if ( c != '\r' ) {
			putchar(c);
		    }
		}
	    } else {
		if ( hanging_quote ) {
		    /* it's a single quote starting quoted data */
		    quoted_data   = 1;
		    hanging_quote = 0;
		}
		if ( c == ';' ) {
		    putchar('\t');
		} else if ( c != '\r' ) {
		    putchar(c);
		}
	    }
	}
    } /* wend */
    fflush(stdout);
    exit(EXIT_SUCCESS);
}
