#!/bin/sh
# bu  -    BackUp:  Backup files or directories to another file system 
#
# Requires: GNU cp
#
# Copyright (c) 1999 2000 2001  Vincent Stemen
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
# 1. Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
# 2. Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#
# THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
# OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
# OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
# SUCH DAMAGE.
#
# $Id: bu,v 2.8 2001/09/30 22:26:12 vince Exp $


CONFIGURATION_FILE=~/.burc      # Default configration file

# Default settings.  These are overridden by the settings in the
# configuration file.

BACKUP_DIR=/backup
LOGDIR=/var/backups/bu
EXCLUDE_FILE=$LOGDIR/Exclude
INCLUDE_FILE=$LOGDIR/Include
LOG=$LOGDIR/log.$(date +%m%d%y)
LOG_ACCESS_PERMS=640
INCREMENTAL=on
GROUP_SIZE=1
DELAY=0

#################

TMPDIR=/var/tmp
TMP=$TMPDIR/bu.$$        # Directory for bu's termporary files
FILE_LIST=$TMP/file-list # The list of files to back up with wild cards expanded
FILTER=$TMP/filter       # The filter that gets generated from the Exclude list
SPECIFIED_FILES=$TMP/specified-files      # The original specified file list
FILTERED_LIST=$TMP/filtered-list # $FILE_LIST after running it through $FILTER

Backup_dir=       # Backup dir specified on the command line with -d
Include_file=     # Include file specified on the command line with -f
Options=          # Additional options to pass to cp such as -v and -u   
Incremental=      # Flag - Toggles incremental backups on or off based on
                  #        the value of $INCREMENTAL.
Verbose=1         # Flag - Verbose mode
Inc_opt='-u'      # To pass to cp
Help=             # Flag - Indicates the help switch was used
Interrupted=      # Flag - Indicates bu was interrupted by a signal
Mail_addr=        # The email address to mail the log to
Progname=$0
Cp=cp


usage()
{
  local version="$(ident $Progname | awk '/Id/ {print $3}')"
  
  if [ $Incremental ]
  then
    on_off=off
    current=on
  else
    on_off=on
    current=off
  fi
    
  cat << eof

bu     Version $version   Pronounced B U
       Back Up files and directories to another file system.   
  
Usage: bu [-i] [-q] [-d backup_dir] [-c config_file] [-f include_file]
          [-m addr] [file|dir file|dir ...]

       For a normal backup of the files listed in $INCLUDE_FILE,
       using the default settings, just type "bu" with no arguments.

       -c config_file
               Specify an alternate configuration file.  
               (Currently $CONFIGURATION_FILE)

       -d backup_dir
               Backup the files to directory, backup_dir.
               (Currently $BACKUP_DIR.)

       -f include_file   (Currently $INCLUDE_FILE)
               Specify an alternate file containing the list of
               files/directories to be backed up. If -f is not used
               and no files or directories are specified, then
               the list of files in the default include file are backed up.

       -i      Turn $on_off incremental backups (currently $current).
               The default can be set in $CONFIGURATION_FILE.  

       -m addr
               Mail the log to the specified email address when the backup
               is finished.

       -q      Quiet mode.  Verbose mode is on by default.  This turns it off.

       -? | -h | -help         
               This help

Notes: 
       o It does not traverse mounted file systems so that you can backup 
         file systems like / without having to umount file systems you
         don't want included, such as NFS mounts.  Of course, you also
         wouldn't want it to traverse the mount point of your backup
         directory.

       o It Can be configured through the file ~/.burc which is created
         automatically the first time bu is run, even with just the help
         switch.

       o Read the documentation on the variables in .burc for configuration
         and more details on how the tool behaves.  

eof
exit
}


# configure()
# Run the configuration file if it exists, otherwise create it with the
# default values for normal operation.

configure()
{
  if [ -x $CONFIGURATION_FILE ] ;then
    . $CONFIGURATION_FILE
  elif [ -f $CONFIGURATION_FILE ] ;then
    chmod u+x $CONFIGURATION_FILE
    . $CONFIGURATION_FILE
  else
    cat << eof > $CONFIGURATION_FILE
## bu configuration file ##

# BACKUP_DIR
# Default destination directory for backing up files.  This should commonly
# be either an NFS mount point or a symbolic link to an NFS or other file
# system that houses your backups.

BACKUP_DIR=$BACKUP_DIR


# INCREMENTAL  - Flag
# Turns on or off incremental backups by default.  When incremental is
# on, it will not copy a file that exists in the backup directory with the
# same or newer modification time.  Either un-set it or set it to "no"
# or "off" to turn off incremental backups.

INCREMENTAL=on


# LOGDIR
# Location of the log files and configuration files (other than this rc file)
# This directory will automatically be created if it does not already
# exist.

LOGDIR=$LOGDIR


# LOG
# Bu log file name.

LOG=\$LOGDIR/log.\$(date +%m%d%y)


# LOG_ACCESS_PERMS
# File permissions for the log files (man chmod).  The logs usually should not
# be world readable.  Otherwise, users can look at the backup logs to see what
# files are in directories they have no 'x' access to.

LOG_ACCESS_PERMS=$LOG_ACCESS_PERMS


# INCLUDE_FILE
# The name of the file containing the default list of files and/or directories
# to back up.  This filename can be overridden on the command line with the -f
# switch.  If files or directories to be backed up are specified on the
# command line, then only the specified files are backed up and this file
# is not read.

INCLUDE_FILE=\$LOGDIR/Include


# EXCLUDE_FILE
# The name of the file containing the list of files to exclude from backups.

EXCLUDE_FILE=\$LOGDIR/Exclude


# GROUP_SIZE and DELAY
# If you specify a GROUP_SIZE greater than 1, and a DELAY greater than
# 0, then it will backup the number of files specified by GROUP_SIZE
# at a time, sleeping DELAY seconds between each group.  This can be
# used to tune the amount of network load when backing up over NFS.
# It will take longer to do the backup but it could be handy if you
# need to do a backup during the day in a high traffic environment and
# don't want to load the network down so much.  It only applies when backing
# up whole directories.  If individual files are specified, it is ignored.

GROUP_SIZE=1
DELAY=0

eof

    chmod u+x $CONFIGURATION_FILE
    . $CONFIGURATION_FILE
  fi

  if [ $INCREMENTAL ] && [ "$INCREMENTAL" != "no" -a "$INCREMENTAL" != "off" ]
  then
    Incremental=1
  else
    Incremental=
  fi

  if [ $Toggle_incremental ]
  then
    if [ $Incremental ]; then
      Incremental=
    else
      Incremental=1
    fi
  fi

  [ ! $Incremental ] && Inc_opt= 

  # If the backup directory or include file are specified on the command
  # line, then override the setting in $CONFIGURATION_FILE.
  BACKUP_DIR="${Backup_dir:-"$BACKUP_DIR"}"
  INCLUDE_FILE="${Include_file:-"$INCLUDE_FILE"}"  

  [ $Help ] && return
  
  # Establish a unique log file name in case there are other instances
  # of bu running at the same time and create the log file.
  
  local n=1
  local log=$LOG
  while [ -f $log ]
  do
    log="$LOG-$n"
    n=$(($n + 1))
  done

  touch $log
  LOG=$log
}


# backup()
# Usage:  backup {file or directory}
# Backs up the specified file or directory to a backup file system that
# is mounted on $BACKUP_DIR.  All files are filtered through $FILTER to
# decide whether they should be excluded.

backup()
{
  options="--parents -avf $Inc_opt"
  local file

  # Find the absolute path of the specified file
  case "$1" in
    /*) file="$1"        ;;
    *)  file="$(pwd)/$1" ;;
  esac

  file=$(real_path "$file")
  
  if [ ! -d "$file" ]
  then
    [ -f $FILTER ] && file=$(echo "$file" | $FILTER 2> /dev/null)
    [ -z "$file" ] && return 0
    
    if [ $Verbose ]; then
      $Cp $options "$file" $BACKUP_DIR | tee -a $LOG.progress
    else
      $Cp $options "$file" $BACKUP_DIR >> $LOG.progress
    fi
    return 0
  fi

  touch $FILTERED_LIST; chmod $LOG_ACCESS_PERMS $FILTERED_LIST
  
  if [ -f $FILTER ]; then

    # FreeBSD sh bug as of 4.3-20010810-STABLE
    # Prints "sh in free(): warning: chunk is already free." when the
    # following line is executed, so I am redirecting stderr for now
    # so we do not have to see the warning.

    find "$file" -xdev ! -type d -print | $FILTER > $FILTERED_LIST 2> /dev/null

  else
    find "$file" -xdev ! -type d -print > $FILTERED_LIST
  fi
  
  # Backup every file in the filtered list
  {
    read filename
    if [ $GROUP_SIZE -gt 1 -a $DELAY -gt 0 ]
    then
      count=1

      while [ -n "$filename" ]
      do
        if [ $Verbose ]; then
          $Cp $options "$filename" $BACKUP_DIR | tee -a $LOG.progress
        else
          $Cp $options "$filename" $BACKUP_DIR >> $LOG.progress
        fi

        if [ $count -lt $GROUP_SIZE ]; then
          count=$(($count + 1))
        else
          count=1
          sleep $DELAY
        fi
        read filename
      done
      
    else
      while [ -n "$filename" ]
      do
        if [ $Verbose ]; then
          $Cp $options "$filename" $BACKUP_DIR | tee -a $LOG.progress
        else
          $Cp $options "$filename" $BACKUP_DIR >> $LOG.progress
        fi
        read filename
      done
    fi
  } < $FILTERED_LIST
}


bye()
{
  rc=${1:-0}
  
  if [ $Interrupted ]
  then
    chmod $LOG_ACCESS_PERMS $LOG
    echo -e "\n- Interrupted -"
    [ $Verbose ] && echo -e "Writing log file\n"
    if [ -s "$LOG.progress" ]
    then
      cat << eof >> $LOG


Files backed up:
---------------
eof

      sed -e 's/-> .*//' $LOG.progress >> $LOG
    fi

    echo -e "\n --  Interrupted $(date)  --\n\n" >> $LOG
  fi

  if [ -n "$Mail_addr" ]; then 
    mail -s "bu (BackUp) log for $(hostname -s) [$(date)]" $Mail_addr < $LOG
  fi

  rm -rf $TMP $LOG.progress

  # Remove stale temp files from any bu processes that are no longer running

  bu_procs=$(ps -ax | awk '/[ \/]bu( |$)/ { print $1 }')
  for dir in $TMPDIR/bu*
  do
    pid=$(echo $dir | sed s/.*bu\.//)
    if ! echo "$bu_procs" | grep -Eq "(^| +)$pid(\n|$| +)"; then
      rm -rf $dir
    fi
  done
  ls -d $TMPDIR/bu.* > /dev/null 2>&1 || rm -rf $LOGDIR/*.progress

  exit $rc
}


signal()
{
  trap "" 2 15  # Disable any further SIGINT and SIGTERM signals
  Interrupted=1
  bye
}


# generate_filter()
# Usage: generate_filter Exclude-file
# Parses the specified Exclude file and generates a sed filter stored in
# the script specified by the global variable, $FILTER.

generate_filter()
{
  local exclude_file=$1
  local pattern       # Contains the line read from the exclude file
  local regex         # $pattern converted to a regular expression
  local filter        # flag
  
  [ ! -s "$exclude_file" ] && return 0
  set -f # Disable wild card expansion (globbing)
  echo -n "sed" > $FILTER

  {
    while read pattern
    do
      # Ignore coments and blank lines
      pattern="$(echo $pattern | sed 's/#.*$//')"
      if [ -z "$pattern" ] || expr "$pattern" : "[ \t]*$" > /dev/null; then
        continue
      fi

      # Strip off quotes if any
      pattern=$(echo "$pattern" | sed 's/"//g')

      # Generate a sed filter from the exclude list
      regex="$(pathname_to_regex $pattern)"
      if [ -d "$pattern" ]; then
        echo -n " -e '/^$regex\\//d'"
      else
        echo -n " -e '/^$regex$/d'"
      fi
      filter=1
    done
  } < $exclude_file >> $FILTER

  set +f # Re-enable globbing

  if [ $filter ]
  then
    echo >> $FILTER
    chmod 750 $FILTER
  else
    rm -f $FILTER
  fi

  return 0
}


parse_cmd_line()
{
  rm -f $SPECIFIED_FILES
  
  while [ "$1" ] 
  do
    case "$1" in
      -d) shift; Backup_dir="$1"       ;;
      -c) shift; CONFIGURATION_FILE=$1 ;;
      -f) shift; Include_file="$1"     ;;
      -m) shift; Mail_addr="$1"        ;;
      -i) Toggle_incremental=1   ;;      
      -q) Verbose= ;;  
      -v) ;; # -v used to turn on verbose mode.  Now it is on by default.
      -\? | -h | -help | --help)  Help=1 ;;
      *)  echo $1 >> $SPECIFIED_FILES ;;
    esac

    shift
  done
}


# pathname_to_regex
# Usage:  pathname_to_regex pathname
# Prints out the specified path as a regular expression for pattern matching.
# Path names may include standard shell wildcards of '*' and '?'.
# Example:
#          pathname /foo?/bar/*    would print
#                   \/foo.\/bar\/.*

pathname_to_regex()
{
  echo "$(echo $* | sed -e 's/\./\\./g' \
                        -e 's/\*/.*/g'  \
                        -e 's/\?/./g'  \
                        -e 's/\//\\\//g')"
}


# real_path()
# Usage:  real_path file|directory
# Prints the real absolute path of the file or directory after converting any
# symbolic links in the path to the directory they point to.

real_path()
{
  local path="$1"
  local dir
  local resolved_path
  
  if [ "$path" = "/" ]
  then
    echo "/"
    return
  fi

  # Remove the trailing slash if any
  path=$(echo $path | sed -e 's/\/$//')

  # If path is a sym link, follow the links until we find the real
  # directory it points to.

  while [ -L "$path" ]
  do
    link=$(/bin/ls -l "$path" | awk -F" -> " '{print $2}')
    case "$link" in
      /*) path="$link" ;;
      *)
        dir=$(dirname "$path")
        [ "$dir" = "/" ] && dir=
        path=$dir/$link
        ;;
    esac
  done

  ifs="$IFS"
  IFS='/'

  # Now resolve any other sym links that may be embedded in the path.
  for component in $path
  do
    [ -z "$component" ] && continue
    resolved_path="$resolved_path/$component"
    if [ -L "$resolved_path" ]
    then
      link=$(/bin/ls -l "$resolved_path" | awk -F" -> " '{print $2}')
      case "$link" in
        /*) resolved_path="$link" ;;
        *)
          dir=$(dirname "$resolved_path")
          [ "$dir" = "/" ] && dir=
          resolved_path=$dir/$link
          ;;
      esac
    fi
  done

  IFS="$ifs"
  echo $resolved_path
}


### Main routine ###

if [ -d $TMP ]; then
  rm -rf $TMP || bye 1
fi
mkdir -p $TMP || bye 1

trap "" 1         # Ignore SIGHUP
trap signal 2 15  # Try to clean up and exit gracefully after SIGINT or SIGTERM

parse_cmd_line "$@"
configure

[ $Help ] && usage
[ ! -d $LOGDIR ] && (! mkdir -p $LOGDIR) && bye 1

if [ ! -d "$BACKUP_DIR" ]
then
  echo -e "bu: Backup directory, $BACKUP_DIR\n    does not exist." >&2
  bye 1
fi

if ! cp --version 2> /dev/null | grep -q fileutils; then
  if ! gcp --version 2> /dev/null | grep -q fileutils
  then
    cat << eof

Error: bu requires the GNU cp command.  It is available in the GNU fileutils
       package (http://www.gnu.org).

eof
    bye 1
  else
    Cp=gcp
  fi
fi

if [ ! -s $SPECIFIED_FILES  -a  -s $INCLUDE_FILE ]; then
  cat $INCLUDE_FILE | sed -e 's/#.*$//' -e '/^[ \t]*$/d' > $SPECIFIED_FILES
fi

[ -f "$FILE_LIST" ] && rm -f $FILE_LIST

# Generate a file list from the specified file name patterns with wild
# cards expanded.
{
  while read file
  do
    # Strip off quotes if any
    file=$(echo "$file" | sed 's/"//g')

    # Insert backslashes in front of spaces, in case any of the filenames
    # have spaces in them.
    file=$(echo "$file" | sed 's/ /\\ /g')

    # Store the list of all files and directories that match the pattern
    eval ls -1 -d -- $file >> $FILE_LIST 2> /dev/null
  done
} < $SPECIFIED_FILES

on_off=on
[ ! $Incremental ] && on_off=off

if [ $Verbose ]
then
  cat << eof

Incremental:      $on_off 
Backup directory: $BACKUP_DIR

eof
fi

# Make entries in the log file

Start_date=$(date)
cat << eof > $LOG

Hostname:         $(hostname)
Incremental:      $on_off 
Backup directory: $BACKUP_DIR
Backup started:   $Start_date
eof

cat << eof >> $LOG

Specified files or directories:
------------------------------
`cat $SPECIFIED_FILES`
eof

# Backup the files

if [ -s $FILE_LIST ]
then
  generate_filter $EXCLUDE_FILE
  touch $LOG.progress
  chmod $LOG_ACCESS_PERMS $LOG.progress
  
  {
    read file
    while [ -n "$file" ]
    do
      backup "$file"
      read file
    done
  } < $FILE_LIST
fi

# The backup is finished, so disable kill signals to make sure we are able
# to write the log.
trap "" 2 15 

# Re-write the log file showing the finish time and list of files backed up

cat << eof > $LOG

Hostname:         $(hostname)
Incremental:      $on_off 
Backup directory: $BACKUP_DIR
Backup started:   $Start_date
Backup finished:  $(date)

Specified files or directories:
------------------------------
`cat $SPECIFIED_FILES`
eof

chmod $LOG_ACCESS_PERMS $LOG

cat << eof >> $LOG


Files backed up:
---------------
eof

if [ ! -s "$LOG.progress" ]
then
  echo -e "No new or changed files to back up\n" >> $LOG
  [ $Verbose ] && echo -e "No new or changed files to back up\n"
else
  sed -e 's/-> .*//' $LOG.progress >> $LOG
  echo >> $LOG
fi

[ $Verbose ] && echo
bye

