/*
  $Header: soundexmodule.c,v 1.3 95/05/03 14:58:23 dwwillia Exp $
  
  Perform soundex comparisons on strings.

  Soundex is an algorithm that hashes English strings into
  alpha-numerical value.  Strings that sound the same are hashed to
  the same value.  This allows for non-literal string matching.

*/

#include <string.h>
#include <ctype.h>
#include "Python.h"

/* RESULT_LENGTH is the number of significant characters in the hash string.
 */
#define RESULT_LENGTH 6

void soundex_hash(char *str, char *result)
{
    char *sptr = str;           /* pointer into str */
    char *rptr = result;        /* pointer into result */
    
    /* Fill result with RESULT_LENGTH '0's if str is NULL and return.
     */
    if(*str == NULL)
    {
        for(; rptr < (result + RESULT_LENGTH); rptr++)
            *rptr = '0';
        *rptr = NULL;
        return;
    }
    
    /*  Preserve the first character of the input string.
     */
    *(rptr++) = toupper(*(sptr++));
    
    /* Translate the rest of the input string into result.  The following
       transformations are used:

       1) All vowels, W, and H, are skipped.

       2) BFPV = 1
          CGJKQSXZ = 2
          DT = 3
          L = 4
          MN = 5
          R = 6

       3) Only translate the first character in a series of repeated chars.

       4) All result strings are truncated to RESULT_LENGTH characters
          and padded to that length with '0's if necessary.
    */

    for(;(rptr < result + RESULT_LENGTH) &&  (*sptr != NULL);sptr++)
    {
        switch (toupper(*sptr))
        {
        case 'W':               /* Skip W, H, and voweles. */
        case 'H':
        case 'A':
        case 'I':
        case 'O':
        case 'U':
        case 'Y':
            break;

        case 'B':   
        case 'F':
        case 'P':
        case 'V':
            /* Assign these characters the value 1 if they are not preceded
               by another '1' character
             */
            if(*(rptr - 1) != '1')
                *(rptr++) = '1';
            break;

        case 'C':
        case 'G':
        case 'J':
        case 'K':
        case 'Q':
        case 'S':
        case 'X':
        case 'Z':
            if(*(rptr - 1) != '2')
                *(rptr++) = '2';
            break;

        case 'D':
        case 'T':
            if(*(rptr - 1) != '3')
                *(rptr++) = '3';
            break;

        case 'L':
            if(*(rptr - 1) != '4')
                *(rptr++) = '4';
            break;

        case 'M':
        case 'N':
            if(*(rptr - 1) != '5')
                *(rptr++) = '5';
            break;

        case 'R':
            if(*(rptr -1) != '6')
                *(rptr++) = '6';

        default:                
            break;
        }
    }

    /* Pad 0's on right side of string out to HASH_LENGTH characters.
     */
    for(;rptr < result + RESULT_LENGTH ;rptr++)
        *rptr = '0';

    /* Terminate the result string.
     */
    *(result + RESULT_LENGTH) = NULL;
}

static PyObject *
sound_similar(PyObject *self, PyObject *args)
{
    char *str1, *str2;
    char res1[RESULT_LENGTH + 1], res2[RESULT_LENGTH + 1];
    
    if(!PyArg_ParseTuple(args, "ss", &str1, &str2))
        return NULL;

    soundex_hash(str1, res1);
    soundex_hash(str2, res2);

    return PyInt_FromLong(strcmp(res1,res2) == 0);
}

/*
  Return the soundex hash value of a string.
 */
static PyObject *
soundex(PyObject *self, PyObject *args)
{
    char *str;
    char res[RESULT_LENGTH + 1];

    if(!PyArg_ParseTuple(args, "s", &str))
        return NULL;

    soundex_hash(str, res);

    return PyString_FromString(res);
}


/* Python Method Table.
 */
static PyMethodDef SoundexMethods[] =
{
    {"soundex", soundex, 1},
    {"sound_similar", sound_similar, 1},
    {NULL, NULL }               /* sentinel */
};


/* Register the method table.
 */
void
initsoundex()
{
    (void) Py_InitModule("soundex",SoundexMethods);
}



