#!/bin/sh
# create and print/mail mass mail letters to many recipients
# Version 0.8.1 dated 13 September 1999  (this is still alpha!)
# copyleft: this shell script is freeware with no warranty at all.
# author:   "Thomas Spahni"  <thomas@spahni.ch>
#
# this script will eventually be integrated into dbtool
#
# command line invocation:
# massmail [-v] [-Pprinter] template[.tex] [database table 'sql_query']
#
# Overview:
# ========
# The massmail script prints or mails multiple copies of a document
# replacing variables in a template file with data from
# a MySQL database.
#
# It is expected that the template is a TeX source. The source may
# contain variables of the format %!variablename!%
# where 'variablename' is a field name from a MySQL database table.
# Although the script performs some TeX-specific conversions it could
# be adapted to other text processors with minor effort.
# It should be possible to use msql instead of mysql but this
# has not been tested and needs some filter to convert the
# msql terminal monitor's output to tab separated rows.
#
# The template file may contain additional lines of the following format:
# (these lines start with a percent %-character)
# % Database = mydatabase
# % Table    = adresstable
# % Where    = name like 'C%' order by town
# % Printer  = -Phplaser
# % Printing = yes (or no)
# % Mailing  = yes (or no)
# % E-Mail   = email-addr
# % Subject  = Re: sending personalized e-mails
#
# Of course TeX will ignore these lines as comments but
# massmail extracts data from them.
# The line beginning with "% Where    = " is the where-clause of
# an SQL query, which will be completed to something like:
# 'SELECT var1,var2,var3... FROM adresstable WHERE name LIKE .... ;'
# (if you omit the semicolon at the end, it will be added by the script).
# Thus it is possible to define in the template of the document
# who will get a copy of it.
#
# The argument to E-Mail is the fieldname of the database table
# containing an e-mail address.
#
# There is a limited capability to replace local variables
# with constant character strings depending on the contents of a
# field in the database. The template may contain lines with
# statements of the form:
#
#          1         2         3        4
# % if salutation == Mr then hello = Dear Sir
# % if salutation == Mrs then hello = Madame
# % if salutation == DEFAULT then hello = ''
# % if salutation == '' then hello = Hi!
#
#  1    is a field name in the sql database
#  2    is a constant string to be compared with the field content
#  3    is a local variable to be used in the template file
#  4    is a constant string to be assigned to local variable 3
#
# The template may contain the local variable !!hello!!
# at one or more places which will be replaced by "Dear Sir"
# or by "Madame" as appropriate. Two single quotes are used
# to indicate an empty character string. The DEFAULT case
# applies where no other rule matches.
#
# Of course the database table must contain a field named 'salutation'.
# Your substitute constant string will be searched for TeX special 
# characters to mask them but the _underscore_ and the |pipe| characters 
# may NOT be used. Allowable TeX special characters are: '& $ % # \'
# (as for '$-Trust, 30%-Savings#1 & Partners')
#
# Be aware that %!variable!% will be replaced with data from
# the respective field name in the database table while
# !!variable!! is a local variable to be replaced by some
# constant string defined in a statement. No checks are made
# to ensure that the statement is reasonable or that
# !!variable!! is ever assigned. The comparison is for a perfect
# match between the constant given in the statement and the
# contents of the data field, but the data field is truncated to
# the length of the constant. For example, the statement
# % if zipcode == 9 then city = hometown
# will match zipcodes like '9000' or '9999' (everything starting
# with 9).
#
# Configuration:
# The behaviour of the script is controlled by:
#    1. Command line arguments
#    2. Configuration file massmail.rc
#    3. Assignments in the template file
#    4. Defaults in this shell script
#
# Command line arguments:
# The minimal command line argument is the name of a template file.
# Other options can be given:
#
# massmail [-v] [-Pprinter] templatefile [database table 'sql-query']
# 
# -v          toggle verbose mode on/off
#             overrides the setting from the config file;
#             defaults to "verbose"
#
# -Pprinter   specifies the printer to use.
#             A printer given on the command line takes precedence
#             over a printer given in the template file and the
#             template file overrules the defaults from the
#             configuration file.
#
#             The following may be given in the exact order
#             after the filename. Normally this information
#             is part of the template file.
#
# <database>  name of the database to use
#
# <table>     name of the table to use
#
# 'sql-query' SQL-Query to retrieve data. Must be quoted!
#
#
# Bugs:
# Probably many. All data must be in one single table.
# The script is dead slow when the number of variables and
# program lines in the template grows (but still faster
# than most printers).
# ----------------------------------------------------------------

#
# User configurable part:
# Name to use when accessing the database.
# This may be the current users login name or a special user 
# having access to the database. This is preferably configured
# in $HOME/.massmail.rc
# USERNAME=$USER
USERNAME=root

# Password for $USERNAME to access the database:
# Leave it blank here (except for testing) and set PW
# in a file named .massmail.rc which is not readable
# for anyone else.
# Example for $HOME/.massmail.rc
# PW='--password=mYpaSSword'
PW=""

#
# get the configuration if one exists;
# otherwise defaults from this script apply
PRINTING=no
PRINTER=
DVIKEEP=no
VERBOSE=yes
CONFIG=massmail.rc
TEXPROG=tex       # could be 'latex'
RMK='%'           # the letter to start a remark
WAITTIME=         # how long it takes to print one page
MAILER=mail
DBI=mysql
DBISHOW=mysqlshow
#DBISHOW=relshow  # for msql
# MAILER=mutt
#
# --- stop --- end of user configuration
#
if test -r $CONFIG ; then
   . $CONFIG
fi
if test -r $HOME/.massmail.rc ; then
   . $HOME/.massmail.rc
fi
#
# verbose flag -v may be given as the first argument on the command line
# it will toggle verbose mode on or off and overrides the setting from
# massmail.rc
if test "$1" = "-v" ; then
   if test "$VERBOSE" = "yes" ; then
      VERBOSE=no
   else
      VERBOSE=yes
   fi
   shift 1
fi

#
# A printer may be given on the command line
if test "$(echo $1 | sed s/-P..*/-P/)" = "-P" ; then
   PRI="$1"
   shift 1
fi

#
VTF=$(echo "$1" | sed s/.tex$//)

#
if test "$VERBOSE" = "yes" ; then
#
# what we do in verbose mode where interactive input is allowed
#
   echo "                      ***************************"
   echo "                      **                       **"
   echo "                      **       mass mail       **"
   echo "                      **                       **"
   echo "                      ***************************"
   echo
   echo "Create multiple letters from a template."
   echo "Template file is a plain TeX or LaTeX source"
   echo "containing variables of the form  %!variable!%"
   echo "where each variable is a column name from your SQL-Database."
   echo
   echo "The template may contain lines of the following format"
   echo "to select data from the database:"
   echo "% Database = mydatabase"
   echo "% Table    = adresstable"
   echo "% Where    = name like 'c%'"
   echo "% Printer  = -PHPlaser"
   echo "Defaults are read from the file massmail.rc"
   echo
   # $1 might be a valid template filename; otherwise ask
   if test -r "${VTF}.tex" ; then
      SFNAME="$1"
      shift 1
   else
      echo -n "enter name of template letter (TeX-Source): "
      read SFNAME
      if test -z "$SFNAME" ; then 
         exit 1
      fi
   fi
else
#
# not verbose: there must be arguments
#
   if test -r "${VTF}.tex" ; then
      SFNAME="$1"
      shift 1
   else
      echo "massmail: missing file argument" >&2
      echo "(usage: massmail [-v -Pxx] file[.tex] mydb dbtable \"query\")" >&2
      exit 1
   fi
fi
#

SFNAME=$(echo "$SFNAME" | sed s/.tex$//)
FNAME=$SFNAME.tex
TNAME=$SFNAME$$

if ! test -r "$FNAME" ; then
   echo "Input file '$FNAME' not found" >&2
   exit 1
fi

# What we have by now is an input file
# so let's check for data in this template file
# this can be overwritten by command line options

#
# Look for printer specification in the template file
# This will override the default from massmail.rc
# An empty value may be assigned, like '% Printer = '
TFPRI=$(sed '/^% *Printer *=/!d' <$FNAME)
if test -n "$TFPRI" ; then
   PRINTER=$(echo "$TFPRI" | sed 's/^% *Printer *= *//')
fi

#
# A printer given on the command line takes precedence over
# printers given as default or in the template file
if test -n "$PRI" ; then
   PRINTER="$PRI"
fi

#
# Is this a printing template file?
# Printing = yes or 'no' overrides everything else
# (this is nice for testing; don't have to edit massmail.rc)
PTING=$(sed '/^% *Printing *=/!d' <$FNAME)
if test -n "$PTING" ; then
   PTANS=$(echo "$PTING" | sed 's/^% *Printing *= *//')
   if test "$PTANS" = yes ; then
      PRINTING=yes
   else
      PRINTING=no
   fi
fi

#
# Get e-Mail related data (if any) from the template file
EMAIL=$(sed -e "/^${RMK} *E-Mail *=/!d" \
            -e "s/^${RMK} *E-Mail *= *//" <$FNAME)
if test -n "$EMAIL" ; then
   EM=$(echo -e "\n$EMAIL")
fi
ESUBJ=$(sed -e "/^${RMK} *Subject *=/!d" \
            -e "s/^${RMK} *Subject *= *//" <$FNAME)
MLING=$(sed -e "/^${RMK} *Mailing *=/!d" \
            -e "s/^${RMK} *Mailing *= *//" <$FNAME)

#
# Database to use
# check for a valid database name in the template file first
# if there is more than one database line then the last one rules

DBASE="$(sed -e '/^% *Database *= */!d' -e 's/^% *Database *= *//' <$FNAME)"
if test -n "$DBASE" ; then
   DBASE=$(echo "$DBASE" | tail -n 1)
fi

#
# If there is a dbase from the command line, then it overrides the
# dbase name given in the file
#
# assume the next parameter is the name of the database
# check it; use this one if it is valid
if test -n "$1" ; then
   if $DBISHOW --user=$USERNAME $PW "$1" 2>&1 >/dev/null ; then
      DBASE="$1"
      shift 1
   else
      DBASE=""
   fi
fi

# if there is still no dbase and we are in verbose mode then ask,
# otherwise complain and quit
if test -z "$DBASE" ; then
   if test "$VERBOSE" = "yes" ; then
      $DBISHOW --user=$USERNAME $PW
      echo -n "enter name of SQL database to use: "
      read DBASE
      if test -z "$DBASE" ; then
         exit 1
      fi
      echo
   else
      echo "massmail: missing database" >&2
      echo "(usage: massmail file[.tex] mydb dbtable \"query\")" >&2
      exit 1
   fi
fi

# testing if the dbase is useable i.e. exists and we have access
if ! $DBISHOW --user=$USERNAME $PW $DBASE 1>/dev/null ; then
   echo "massmail: Database $DBASE not found"
   exit 1
fi

if test "$VERBOSE" = "yes" ; then
   echo "using database '$DBASE'"
fi

#
# name of the table to use:
# first: take it from the template file
DBTBL=$(sed -e "/^% *Table *= */!d" -e "s/^% *Table *= *//" <$FNAME)

#
# If there is a table from the command line, then it overrides the
# table name given in the file

if test -n "$1" ; then
   if $DBISHOW --user=$USERNAME $PW $DBASE "$1" 2>&1 >/dev/null ; then
      DBTBL=$1
      shift 1
   else
      DBTBL=""
   fi
fi

# if there is still no table and we are in verbose mode then ask,
# otherwise complain and quit

if test -z "$DBTBL" ; then
   if test "$VERBOSE" = "yes" ; then
      $DBISHOW --user=$USERNAME $PW $DBASE
      echo -n "enter name of table to use: "
      read DBTBL
      if test -z "$DBTBL" ; then
         exit 1
      fi
      echo
   else
      echo "massmail: missing table name" >&2
      echo "(usage: massmail file[.tex] mydb dbtable \"query\")" >&2
      exit 1
   fi
fi

# testing if the table is useable i.e. exists and we have access
if ! $DBISHOW --user=$USERNAME $PW $DBASE $DBTBL 1>/dev/null ; then
   echo "massmail: Table $DBTBL in database $DBASE not found"
   exit 1
fi

if test "$VERBOSE" = "yes" ; then
   echo "using data from table '$DBTBL'"
fi

#
# Check the template file for a SQL-WHERE clause
#
SCLAUSE=$(sed -e "/^% *Where *= */!d" \
              -e "s/^% *Where *= *//" \
              -e "s/; *$//" <$FNAME )

#
# or take it from the command line
if test -n "$1" ; then
   SCLAUSE="$1"
fi

if test -z "$SCLAUSE" ; then
   if test "$VERBOSE" = "yes" ; then
      echo "Field names are:"
      echo "show columns from $DBTBL;" | $DBI --user=$USERNAME $PW $DBASE \
        | sed -e '1 d' -e 's/	.*$//' | fold -s -w 72
      echo -n "--> enter condition to select data sets: "
      read "CONDITION"
      if test -z "$CONDITION" ; then
         exit 1
      fi

      # do some conversion of the query
      # users may use = instead of 'like'
      # and quotes are automatically added if needed
      CONDITION=$(echo "$CONDITION" | sed -e "s/ = / like /g" \
        -e "s/\\( like \\)\\([^ ][^ ]*\\)/\\1'\\2'/g" -e "s/''/'/g")
   else
      echo "massmail: missing SQL query to select data" >&2
      echo "(usage: massmail file[.tex] mydb dbtable \"query\")" >&2
      exit 1
   fi
fi


# Programmable Variables
# This part deals with programmable contents of the template letter
# Syntax is:
#'% if anrede == Herrn then ansprech = Sehr geehrter Herr'
#
# get those parts from the template containing program statements;
# all white space is replaced by one underscore and '% if '
# is deleted.
PROG=$(cat $FNAME | sed -e '/%[ 	]*if[ 	]/!d' \
                        -e 's/[ 	][ 	]*/_/g' \
                        -e 's/^%_*if_//')
# (space in the brackets is one blank and one tab)

# From the template text file isolate all variables
#      and sort alphabetically; add commas
#      looks for variables in if-statements too
VVARS=$(cat $FNAME | sed -e '/%!.*!%/!d' \
           | sed -e 's/^[^%]*%!/%!/' \
                 -e 's/!%[^%]*%!/!%%!/g' \
                 -e 's/!%[^%]*$/!%/' \
                 -e 's/!%%!/!%\
%!/g' | sed -e 's/^%!//' -e 's/!%$//')

IFVRS=$(echo "$PROG" | sed -e 's/_==_.*$//')

if test -n "$IFVRS" ; then
   IFVRS=$(echo -e "\n$IFVRS")
fi

VARS=$(echo "${VVARS}${IFVRS}${EM}" | sed -e 's/$/,/' | sort -u \
                                    | sed -e '$ s/,$//')

if test "$VERBOSE" = "yes" ; then
   echo -n "checking variables ... "
fi
ERRORFLAG=false
for VV in $VARS ; do
   V=$(echo $VV | sed "s/,$//")
   if ! echo "select $V from $DBTBL where 1=0;" \
      | $DBI --user=$USERNAME $PW $DBASE >/dev/null 2>&1 ; then
      echo -n -e "\nunknown variable '$V' in table '$DBTBL'"
      ERRORFLAG=true
   fi
done
 
if test $ERRORFLAG = true ; then
   echo "" ; echo $VARS ; exit 1
fi
if test "$VERBOSE" = "yes" ; then
   echo "done; all ok"
fi


if test -z "$SCLAUSE" ; then
   QUERY="select $VARS from $DBTBL where $CONDITION ; "
else
   QUERY="select $VARS from $DBTBL where $SCLAUSE ; "
fi

if test "$VERBOSE" = "yes" ; then
   echo $QUERY | fold -s -w 72
fi

echo "$QUERY" | $DBI --user=$USERNAME $PW $DBASE >$TNAME.tmp 2>&1

# get the list of field names from the first line:
# separate fields with linefeeds and add line numbers
FIELDS=$(head --lines 1 $TNAME.tmp | tr '\011' '\012' \
         | cat -n - | sed -e "s/^ *//" | tr '\011' '_')

# get rid of the first line:
sed -e "1 d" <$TNAME.tmp >$TNAME.dta
rm -f $TNAME.tmp

# let's see how many rows have been retrieved
LASTLINE=$(wc -l $TNAME.dta | cut -c 1-7 | sed -e "s/^ *//")
if test "$VERBOSE" = "yes" ; then
   echo "... processing $((LASTLINE)) data sets ... "
fi
# initialize the counter
CTR=1

TIMEFORMAT='%3R'
# Default IFS = <space><tab><newline>
# here <tab> is no longer an Internal Field Separator 
# for the read command; otherwise it would skip leading tabs
# when reading data lines and these tabs mark empty fields
# which must be maintained.
# just maintain <space> and <newline> as IFS but no <tab>
IFS=' \
'
# while reads from standard input
# file $TNAME.dta is fed as input to the 'done' command
time while read DTALINE
   do
   # we want one item per line
   DTATBL=$(echo "$DTALINE" | tr '\011' '\012')
   #
   # make a local copy of the letter template
   cp $FNAME $FNAME.loc
   #
   # prepare a letter for this data row
   # 1st step:
   # adapt the template according to any program lines in the original
   #
   for PL in $PROG ; do
      # name of the variable in the database
      DBVAR=$(echo $PL | sed -e 's/_==_.*$//')
      # get value to compare with
      CMPVL=$(echo $PL | sed -e 's/^.*_==_//' -e 's/_*then_.*//')
         if test "$CMPVL" = "''" ; then CMPVL="EMPTY" ; fi
         CMPLEN=$(echo "$CMPVL" | sed -e "s/././g")
      # name of the variable in the template file
      TPVAR=$(echo $PL | sed -e 's/^.*_*then_//' -e 's/_=_.*//')
      # constant to be assigned if the condition is met
      TPVAL=$(echo $PL | sed -e 's/^.*_=_//' -e 's/_/ /g')
         if test "$TPVAL" = "''" ; then TPVAL="" ; fi
      # Mask special characters in the dataline for TeX
      TPVAL=$(echo "$TPVAL" | sed \
         -e "s/#/\\\\\\\\#/g" \
         -e "s/%/\\\\\\\\%/g" \
         -e "s/\\$/\\\\\\\\$/g" \
         -e "s/&/\\\\\\\\\\\\&/g")
      #
      #echo $DBVAR $CMPVL $TPVAR $TPVAL
      #
      # now: get the value for the database variable $DBVAR:
      # result is returned in $DBVARVAL and truncated to len($CMPVL)
      FC=$(echo "$FIELDS" | sed -e "/$DBVAR/!d" -e "s/_.*//")
      DBVARVAL=$(echo "$DTATBL" | sed -e "$FC !d" \
                  -e "s/\\($CMPLEN\\)\\(.*\\)/\\1/")
      if test -z "$DBVARVAL" ; then DBVARVAL="EMPTY" ; fi
      #
      if test "$DBVARVAL" = "$CMPVL" ; then
         sed -e "s|!!$TPVAR!!|$TPVAL|g" <$FNAME.loc >$FNAME.tmp
         rm $FNAME.loc ; mv $FNAME.tmp $FNAME.loc
      else
         if test "$CMPVL" = DEFAULT ; then
            sed -e "s|!!$TPVAR!!|$TPVAL|g" <$FNAME.loc >$FNAME.tmp
            rm $FNAME.loc ; mv $FNAME.tmp $FNAME.loc
         fi
      fi
   done
   #
   # 2nd: Substitution of ordinary variables:
   #
   # Mask special characters in the datalines for TeX
   DTATBL=$(echo "$DTATBL" | sed \
                 -e "s/\\\\/|\\\\\\\\backslash|/g" \
                 -e "s/#/\\\\\\\\#/g" \
                 -e "s/%/\\\\\\\\%/g" \
                 -e "s/\\$/\\\\\\\\$/g" \
                 -e "s/&/\\\\\\\\\\\\&/g" \
                 -e "s/|/$/g")
   #
   for FN in $FIELDS ; do
      FC=$(echo $FN | sed "s/_.*//")
      FF=$(echo $FN | sed "s/[0-9]*_//")
      FD=$(echo "$DTATBL" | sed -e "$FC !d" -e "s|/|\\\\/|g")
      sed "s/%!$FF!%/$FD/g" <$FNAME.loc >$FNAME.tmp
      rm $FNAME.loc ; mv $FNAME.tmp $FNAME.loc
      if test "$FF" = "$EMAIL" ; then EADDR="$FD" ; fi
   done
   #
   FSUFF=$(echo "0000$((CTR))" \
      | sed "s/\\(0*\\)\\([0-9][0-9][0-9][0-9]\\)/\\2/")
   # write the result to a unique file and remove remarks
   sed "/^${RMK}.*/d" >$TNAME-$FSUFF.tex <$FNAME.loc
   #
   if test "$PRINTING" = yes ; then
      $TEXPROG $TNAME-$FSUFF.tex >/dev/null 2>&1
      if test -r $TNAME-$FSUFF.dvi ; then
         dvips -q -o $TNAME-$FSUFF.ps $TNAME-$FSUFF.dvi
         lpr $PRINTER $TNAME-$FSUFF.ps
         rm -f $TNAME-$FSUFF.tex
         rm -f $TNAME-$FSUFF.log
      else
         echo "Problems with your TeX syntax; please check $TNAME-$FSUFF.log"
         exit 1
      fi
      if test -n "$WAITTIME" ; then sleep $WAITTIME ; fi
      rm -f $TNAME-$FSUFF.ps
      rm -f $TNAME-$FSUFF.dvi
   fi
   # show what has been done
   if test "$VERBOSE" = "yes" ; then
      echo -n "$(echo "${FSUFF}    ")"
   fi
   CTR=$((CTR+1))
   #
   if test "$MLING" = yes ; then
      $MAILER -s "$ESUBJ" "$EADDR" < $TNAME-$FSUFF.tex
   fi
done <$TNAME.dta


rm -f $TNAME-*.tmp
rm -f $TNAME.dta
rm -f $TNAME-$FSUFF.log
rm -f $FNAME.loc
#
if test "$VERBOSE" = "yes" ; then
   echo -e "\ndone"
fi
exit 0
