"""
Perform soundex comparisons on strings.

Soundex is an algorithm that hashes English strings into
alpha-numerical value.  Strings that sound the same are hashed to
the same value.  This allows for non-literal string matching.

$Header: soundex.py,v 1.2 95/05/03 14:58:00 dwwillia Exp $
"""
from string import upper
from regsub import gsub

RESULT_LENGTH = 6

def soundex(str):
    "Return the soundex value to a string argument"

    if not str:
	return "0"*RESULT_LENGTH

    # Normalize string to upper case.
    #
    str = upper(str)

    # Get the first character of the string.
    #
    first_char = str[0]

    # Remove W and H's s from string.
    #
    str = gsub("W","",str)
    str = gsub("H","",str)

    # Remove all vowels except in the first position.
    #
    str = str[1:]
    for vowel in "AEIOUY":
	str = gsub(vowel,"",str)
    str = first_char + str

    # Recode characters as numbers.
    #
    pos = 0
    value =     "000000111122222222334556"
    for char in "AEIOUYBFPVCGJKQSXZDTLMNR":
	str = gsub(char,value[pos],str)
	pos = pos + 1

    # If there are two identical adjacent digits, remove one.
    #
    for digit in "0123456":
	str = gsub(digit+digit,digit,str)

    # Replace the first digit with the first char from the original word.
    #
    str = first_char + str[1:]
    
    # Pad string with 0's on the right so that it is 6 characters long.
    #
    str = str+"0"*(RESULT_LENGTH-len(str))

    return str

def soundex_compare(str1, str2):
    "1 if strings are close. 0 otherwise."
    if  soundex(str1) == soundex(str2):
	return 1
    else:
	return 0


test = 0
if test:
    print soundex("david")
    print soundex("williams")
    print soundex("wiliams")
    print soundex("")
    print soundex("w")
    print soundex("aw")
    print soundex("wa")

    if soundex_compare("callahan","calaham"):
	print "works"

    

		 
    
    

