#! /bin/sh
een=$1
count="0"
raw="0"

if [ -z $een ] ; then een="-h"; fi

if [ $een = '-h' ] ; then
   echo "Usage listwords [-c|-h|-r|] [-s file] filename";
   echo " default : sorted, uniq'd, not counted";
   echo "      -c : as above, count words";
   echo "      -r : raw (not sorted, not uniq'd, not counted)"
   echo "      -h : this message";
   echo "      -s name : file with stopwords"
   echo "Nota Bene: everything is first converted to lowercase";
   exit 0
fi

default="1"
 
while [ $2 ] ; do

  if [ $1 = '-s' ] ; then
    stopwoorden=$2
    default="0"
    shift; shift;
  fi

  if [ $1 = '-c' ] ; then 
    count="1"; 
    default="0"
    shift
  elif [ $1 = '-r' ] ; then 
    raw="1"; 
    shift
  fi

done

bestand=$1

# echo bestand : $bestand 
# echo stopw   : $stopwoorden
# echo default : $default
# echo raw     : $raw

tr 'A-Z' 'a-z' < $bestand | tr -sc 'a-z' '\012' > raw


if [ $raw = "1" ] ; then  # schijnt te werken (kijkt niet naar stopwoorden)
   cat raw
   exit 0 
fi

cat raw |
sort -b > raw2
 
if [ $default = "1" ] ; then # default schijnt te werken
   cat raw2 | uniq
   exit 0
fi

if [ $stopwoorden ] ; then
   if ! [ -e $stopwoorden ] ; then 
      echo $stopwoorden not found;
      exit 1;
   fi

   if [ $count = "0" ] ; then  # als ze niet geteld worden
   cat raw2 | uniq  | comm -23 - $stopwoorden 
   exit 0
   fi

   cat raw2 | uniq -c | join -v 1 -o 1.2 1.3 -1 3 - $stopwoorden

fi

mv raw2 raw3


if [ $count = "1" ] ; then uniq -c raw3;
   else uniq raw3;
fi
   



