To: "Gordon M. Charrick" Cc: info-ph@uxc.cso.uiuc.edu Subject: Re: Searching using nickname also In-Reply-To: Your message of Fri, 25 Sep 92 14:31:43 EDT. Date: Fri, 25 Sep 92 14:00:27 EDT Message-Id: <5988.717447641@uxc.cso.uiuc.edu> From: "Paul Pomes, UofIllinois-CSO" Funny you should ask that. During our last update of the student records I ran an extra perl script to insert certain well known nicknames. The script was developed after obtaining frequency counts of first names in the existing data. I only added nicknames for names that occurred more than a hundred times in our 60K user database. The nicknames for names already in the RateAKey() routine in query.c were then coded in as well. The distributed version of RateAKey() will require modification for large non-English databases. /pbp ==== #!/usr/local/bin/perl # # addnickname - add nickname from table # # usage: addnickname config [files] if ($#ARGV<0) {print STDERR "Usage: addnickname config [files]\n";exit 1;} %nicks = ( "alexander","alex", "andrew","andy", "anthony","tony", "barbara","barb", "benjamin","ben", "bradley","brad", "calvin","cal", "carolyn","carol", "catherine","cathy", "christine","chris", "christopher","chris", "daniel","dan", "david","dave", "deborah","deb", "donald","don", "douglas","doug", "edward","ed", "elizabeth","beth", "eugene","gene", "franklin","frank", "frederick","fred", "geoffrey","geoff", "gerald","jerry", "gregory","greg", "jacqueline","jackie", "james","jim", "jeffery","jeff", "jeffrey","jeff", "jennifer","jenny", "jonathan","jon", "joseph","joe", "joshua","josh", "judith","judy", "katherine","kathy", "kathleen","kathy", "kathryn","kathy", "kenneth","ken", "kimberley","kim", "kimberly","kim", "laurence","larry", "lawrence","larry", "leonard","len", "matthew","matt", "michael","mike", "nicholas","nick", "pamela","pam", "patrick","pat", "philip","phil", "phillip","phil", "randall","randy", "raymond","ray", "richard","rich rick", "robert","rob bob", "ronald","ron", "russel","russ", "russell","russ", "samuel","sam", "sandra","sandy", "stanley","stan", "stephen","steve", "steven","steve", "stuart","stu", "thomas","tom", "timothy","tim", "victoria","vicky", "walter","walt", "william","will bill" ); # find the numbers of the noupdate and nickname fields $namenum = $nonum = $nicknum = -1; $config=shift; open (CONFIG,$config) || die "$config: $!\n"; while () { if (/:no_update:/) {($nonum)=(split(':'))[0];} elsif (/:name:/) {($namenum)=(split(':'))[0];} elsif (/:nickname:/) {($nicknum)=(split(':'))[0];} } close(CONFIG); while (<>) { if ((/^$namenum:/o || /\t$namenum:/o) && !/\t$nonum:/o && !/\t$nicknum:/o) { chop; $line=$_; $firstname = $nickname = ""; if (/\t$namenum:[^ ][^ ]*[ ]([^ ]*).*/) {$firstname = $1;} if (/^$namenum:[^ ][^ ]*[ ]([^ ]*).*/) {$firstname = $1;} $nickname = $nicks{$firstname}; if ($nickname ne "") { print "$line\t$nicknum:$nickname\n"; } else { print "$line\n"; } } else { print $_; } } .