/*
**      CmpSize
**
**      Copyright  1998 Dieter Stolte
**                       All rights reserved.
**
**      $VER: cmpsize.c 0.2 (30.08.98) $
**
*/

/* TODO: recursive directories, ... */

/* ============================================================================================= */

/* Includes */

#include <pwd.h>
#include <grp.h>
#include <sys/types.h>
#include <dirent.h>
#include <stdio.h>
#include <sys/stat.h>
#include <unistd.h>
#include <string.h>
#include <stdlib.h>
#include <ctype.h>
#include <time.h>

/* ============================================================================================= */

/* Defines */

#define VERSION "0.2"
#define BUFSIZE 16384

/* ============================================================================================= */

/* TypeDefs */

struct  Nodes                          /* list node which holds all infomation about one file */
{
        char                *name;     /* file name */
        umode_t              mode;     /* protection flags */
        uid_t                uid;
        gid_t                gid;
        off_t                length;   /* size */
        time_t               mtime;    /* modify time */
        int                  holdcnt;  /* number of directory */
        struct Nodes        *samenext; /* file list with same size */
        struct Nodes        *next;
};

/* ============================================================================================= */

/****** Cmpsize/NewStrDup ***************************************************
*
*   NAME
*       NewStrDup --  Duplicate A String (V1)
*
*   SYNOPSIS
*       string pointer of new string = NewStrDup( original string )
*
*       char * NewStrDup( char * )
*
*   FUNCTION
*       Allocates memory for a string and copies original string to this
*       buffer. You have to call free() to give memory back to system.
*
*   INPUTS
*       Pointer to original string
*
*   RESULT
*       Pointer to doubled string
*
*   NOTES
*       Because the original function strdup() is not ansi standard and
*       some systems may not have it, this function has to be used.
*
*   SEE ALSO
*       K&R
*
*****************************************************************************
*
*   status ready
*/

char * NewStrDup ( const char *s )
{
char *ns;

  ns = ( char * )malloc ( strlen ( s ) + 1 );

  if ( ns != NULL )
    strcpy ( ns, s );

  return ns;
}

/* ============================================================================================= */

/****** Cmpsize/YesNo *******************************************************
*
*   NAME
*       YesNo --  Wait For YES Or NO Answer (V1)
*
*   SYNOPSIS
*       bool = YesNo( void )
*
*       int YesNo( void )
*
*   FUNCTION
*       Waits for input from stdin. If answer is something else than Y,y,J,j
*       then its a NO.
*
*   INPUTS
*       %
*
*   RESULT
*       0 = YES, 1 = NO
*
*   NOTES
*       %
*
*   SEE ALSO
*       %
*
*****************************************************************************
*
*   status ready
*/

int
YesNo ()
{
  char buf[128];
  int len = 0;
  int c;

  while ((c = getchar ()) != EOF && c != '\n')
    if ((len > 0 && len < 127) || (len == 0 && !isspace (c)))
      buf[len++] = c;
  buf[len] = '\0';

  if ( ( len == 1 ) && ( ( buf[0] == 'Y' ) ||
                         ( buf[0] == 'y' ) ||
                         ( buf[0] == 'J' ) ||
                         ( buf[0] == 'j' ) ) )
  {
    return 0;
  }
  return 1;
}

/* ============================================================================================= */

void
rwx ( unsigned short bits, char *chars )
{
  chars[0] = ( bits & S_IRUSR ) ? 'r' : '-';
  chars[1] = ( bits & S_IWUSR ) ? 'w' : '-';
  chars[2] = ( bits & S_IXUSR ) ? 'x' : '-';
}

/* ============================================================================================= */

void
PrintMode ( umode_t mode, uid_t uid, gid_t gid )
{
  char            modebuf[10];
  struct passwd  *pwstruct;
  struct group   *gpstruct;

  rwx ( ( mode & 0700 ) << 0, &modebuf[0]);
  rwx ( ( mode & 0070 ) << 3, &modebuf[3]);
  rwx ( ( mode & 0007 ) << 6, &modebuf[6]);

  if ( mode & S_ISUID )
    {
      if ( modebuf[2] != 'x' )
        /* Set-uid, but not executable by owner.  */
        modebuf[2] = 'S';
      else
        modebuf[2] = 's';
    }
  if ( mode & S_ISGID )
    {
      if ( modebuf[5] != 'x' )
        /* Set-gid, but not executable by group.  */
        modebuf[5] = 'S';
      else
        modebuf[5] = 's';
    }
  if ( mode & S_ISVTX )
    {
      if ( modebuf[8] != 'x' )
        /* Sticky, but not executable by others.  */
        modebuf[8] = 'T';
      else
        modebuf[8] = 't';
    }

  modebuf[9]='\0';

  printf ( "-%s ", modebuf );

  pwstruct = getpwuid ( uid );

  if ( pwstruct != NULL )
  {
    printf ( "%-8.8s ", pwstruct->pw_name );
  }

  gpstruct = getgrgid ( gid );

  if ( gpstruct != NULL )
  {
    printf ( "%-8.8s ", gpstruct->gr_name );
  }
}

/* ============================================================================================= */

void
PrintTime ( time_t mtime )
{
  time_t      current_time;
  char        timebuf[50];
  const char *fmt;
  size_t      anz;

  current_time = time ((time_t *) 0);

  if ( current_time > mtime + 6L * 30L * 24L * 60L * 60L      /* Old. */
      || current_time < mtime - 60L * 60L )   /* In the future. */
  {
    fmt = "%b %e  %Y";
  }
    else
  {
    fmt = "%b %e %H:%M";
  }

  anz = strftime ( timebuf, 50, fmt, localtime ( &mtime ) );

  if ( ( anz > 0 ) && ( anz < 50 ) )
  {
    printf ( "%s", timebuf );
  }
}

/* ============================================================================================= */

/****** Cmpsize/InitList ***************************************************
*
*   NAME
*       InitList --  Init A New List (V1)
*
*   SYNOPSIS
*       Pointer to head of list = InitList( )
*
*       struct Nodes * InitList( void )
*
*   FUNCTION
*       More or less a dummy for further development
*
*   INPUTS
*       %
*
*   RESULT
*       NULL
*
*   NOTES
*       %
*
*   SEE ALSO
*       %
*
*****************************************************************************
*
*   status ready
*/

struct Nodes *InitList ( void )
{
  return NULL;
}

/* ============================================================================================= */

/****** Cmpsize/PutList *****************************************************
*
*   NAME
*       PutList --  Put New Node In List (V1)
*
*   SYNOPSIS
*       Pointer to head of list = PutList( Pointer to head of list,
*                                          Filename, Protection, UID, GID,
*                                          Size, Modify time, Dir number   )
*
*       struct Nodes * PutList( struct Nodes *, ... )
*
*   FUNCTION
*       The new node will be sorted by size in the single linked list in
*       ascending order.
*
*   INPUTS
*       The head of the list, the name, file info and the dir number.
*
*   RESULT
*       On success new pointer to list, NULL otherwise
*
*   NOTES
*       %
*
*   SEE ALSO
*       %
*
*****************************************************************************
*
*   status ready
*/

struct Nodes * PutList ( struct Nodes *nodeptr,
                         char         *newname,
                         umode_t       newmode,
                         uid_t         newuid,
                         gid_t         newgid,
                         off_t         newlength,
                         time_t        newmtime,
                         int           newholdcnt )
{
struct Nodes     *posptr,
                 *memptr;
char             *name;

  posptr = nodeptr;

  name = NewStrDup ( newname );

  if ( name == NULL )
  {
    /* Error */
    fprintf ( stderr, "error: malloc ...\n" );
    exit( 1 );
  }

  memptr = ( struct Nodes * )malloc ( sizeof ( struct Nodes ) );

  if ( memptr == NULL )
  {
    /* No Memory */
    fprintf ( stderr, "error: malloc ...\n" );
    free ( name );
    exit( 1 );
  }
  else
  {
    if ( posptr == NULL )
    {
      /* List Empty */
      memptr->name        = name;
      memptr->mode        = newmode;
      memptr->uid         = newuid;
      memptr->gid         = newgid;
      memptr->length      = newlength;
      memptr->mtime       = newmtime;
      memptr->holdcnt     = newholdcnt;
      memptr->next        = NULL;
      memptr->samenext    = NULL;

      return memptr;
    }
    else
    {
      while ( 1 )
      {
        if ( posptr->length == newlength )
        {
          while ( posptr->samenext != NULL )
          {
            posptr = posptr->samenext;
          }
          /* Double size */
          memptr->name        = name;
          memptr->mode        = newmode;
          memptr->uid         = newuid;
          memptr->gid         = newgid;
          memptr->length      = newlength;
          memptr->mtime       = newmtime;
          memptr->holdcnt     = newholdcnt;
          memptr->next        = NULL;
          memptr->samenext    = NULL;

          posptr->samenext    = memptr;
          return nodeptr;
        }

        if ( posptr->length > newlength )
        {
          memptr->name        = posptr->name;
          memptr->mode        = posptr->mode;
          memptr->uid         = posptr->uid;
          memptr->gid         = posptr->gid;
          memptr->length      = posptr->length;
          memptr->mtime       = posptr->mtime;
          memptr->holdcnt     = posptr->holdcnt;
          memptr->next        = posptr->next;
          memptr->samenext    = posptr->samenext;

          posptr->name        = name;
          posptr->mode        = newmode;
          posptr->uid         = newuid;
          posptr->gid         = newgid;
          posptr->length      = newlength;
          posptr->mtime       = newmtime;
          posptr->holdcnt     = newholdcnt;
          posptr->next        = memptr;
          posptr->samenext    = NULL;

          return nodeptr;
        }

        if ( posptr->next == NULL )
        {
          memptr->name        = name;
          memptr->mode        = newmode;
          memptr->uid         = newuid;
          memptr->gid         = newgid;
          memptr->length      = newlength;
          memptr->mtime       = newmtime;
          memptr->holdcnt     = newholdcnt;
          memptr->next        = NULL;
          memptr->samenext    = NULL;

          posptr->next        = memptr;
          return nodeptr;
        }
        posptr = posptr->next;
      }
    }
  }
  return nodeptr;
}

/* ============================================================================================= */

/****** Cmpsize/DelList ****************************************************
*
*   NAME
*       DelList --  Deletes List From Memory (V1)
*
*   SYNOPSIS
*       void = DelList( Pointer to head of list )
*
*       void DelList( struct Nodes * )
*
*   FUNCTION
*       Deletes list from memory.
*
*   INPUTS
*       Head of the list.
*
*   RESULT
*       %
*
*   NOTES
*       %
*
*   SEE ALSO
*       %
*
*****************************************************************************
*
*   status ready
*/

void
DelList ( struct Nodes *nodeptr )
{
struct Nodes       *posptr,
                   *posptr2;

  while ( nodeptr != NULL )
  {
    posptr = nodeptr;

    while ( posptr->samenext != NULL )
    {
      posptr2 = posptr->samenext;
      free ( posptr2->name );
      posptr = posptr->samenext;
      free ( posptr2 );
    }
    posptr = nodeptr;
    /* Free Memory Of Allocated Strings */
    free ( nodeptr->name );
    nodeptr = nodeptr->next;
    free ( posptr );
  }
}

/* ============================================================================================= */

/****** Cmpsize/DiffFiles ***************************************************
*
*   NAME
*       DiffFiles --  Compare Two Files Which Have Same Size (V1)
*
*   SYNOPSIS
*       bool = DiffFiles( Filename1, Filename2, Size )
*
*       int DiffFiles( const char *file1, const char *file2, off_t size )
*
*   FUNCTION
*       Compare two files with same size.
*
*   INPUTS
*       Filenames and size.
*
*   RESULT
*        0 = files are same,
*        1 = files are different,
*       -1 = error
*
*   NOTES
*       %
*
*   SEE ALSO
*       %
*
*****************************************************************************
*
*   status ready
*/

int
DiffFiles ( const char *file1, const char *file2, off_t size )
{
  FILE              *filehd1,
                    *filehd2;
  void              *buffer1,
                    *buffer2;
  unsigned long int  readlen1,
                     readlen2;

  /* Get buffer mem for files */
  buffer1 = ( void *)malloc ( BUFSIZE );
  buffer2 = ( void *)malloc ( BUFSIZE );

  if ( buffer1 == NULL )
  {
    return -1;
  }

  if ( buffer2 == NULL )
  {
    free ( buffer1 );
    return -1;
  }

  /* Open the files to check */
  filehd1 = fopen ( file1, "rb" );
  filehd2 = fopen ( file2, "rb" );

  if ( filehd1 == NULL )
  {
    free ( buffer1 );
    free ( buffer2 );
    return -1;
  }

  if ( filehd2 == NULL )
  {
    free ( buffer1 );
    free ( buffer2 );
    fclose ( filehd1 );
    return -1;
  }

  /* Read data from the files while not EOF */
  while ( 1 )
  {
    readlen1 = fread ( buffer1, 1, BUFSIZE, filehd1 );
    readlen2 = fread ( buffer2, 1, BUFSIZE, filehd2 );

    if ( readlen1 != readlen2 )
    {
      /* Files are different, because sizes read are different (=>error) */
      free ( buffer1 );
      free ( buffer2 );
      fclose ( filehd1 );
      fclose ( filehd2 );
      return 1;
    }

    if ( memcmp ( buffer1, buffer2, readlen1 ) != 0 )
    {
      /* Files are different */
      free ( buffer1 );
      free ( buffer2 );
      fclose ( filehd1 );
      fclose ( filehd2 );
      return 1;
    }

    if ( ( readlen1 != BUFSIZE ) ||
         ( readlen2 != BUFSIZE ) ||
         ( feof ( filehd1 ) != 0 ) ||
         ( feof ( filehd2 ) != 0 ) )
    {
      /* End of files reached */
      free ( buffer1 );
      free ( buffer2 );
      fclose ( filehd1 );
      fclose ( filehd2 );
      return 0;
    }
  }
return 0;
}

/* ============================================================================================= */

/****** Cmpsize/DiffListFiles ***********************************************
*
*   NAME
*       DiffListFiles --  Compare All Files In The List (V1)
*
*   SYNOPSIS
*       void = DiffListFiles( Pointer to head of list,
*                             Remove flag,
*                             Information flag         )
*
*       void DiffListFiles( struct Nodes *nodeptr, int rmflag, int infflag )
*
*   FUNCTION
*       Go through the list and compare the files with same size. If files
*       are equal and remove flag is set, ask for ok to delete one of the
*       equal files. If information flag is set, print protection flags and
*       modify time of the equal files.
*
*   INPUTS
*       Pointer to head of list, remove and information flag.
*
*   RESULT
*       %
*
*   NOTES
*       %
*
*   SEE ALSO
*       %
*
*****************************************************************************
*
*   status ready
*/

void DiffListFiles ( struct Nodes *nodeptr, int rmflag, int infflag )
{
struct Nodes       *posptr,
                   *anchorptr;

  while ( nodeptr != NULL )
  {
    posptr = nodeptr;
    anchorptr = nodeptr;

    /* While there are files with same size check all permutations of them */
    while ( anchorptr->samenext != NULL )
    {
      posptr = anchorptr;

      while ( posptr->samenext != NULL )
      {
        posptr = posptr->samenext;

        /* double detected */
        if ( DiffFiles ( anchorptr->name, posptr->name, anchorptr->length ) == 0 )
        {
          if ( infflag == 0 )
          {
            printf ( "identical files with size: %lu bytes:\n", anchorptr->length );
            printf ( "(dir %2i)  %s\n", anchorptr->holdcnt, anchorptr->name );
            printf ( "(dir %2i)  %s\n", posptr->holdcnt, posptr->name );
	  }
          else
	  {
            printf ( "identical files with size: %lu bytes:\n", anchorptr->length );
            PrintMode ( anchorptr->mode, anchorptr->uid, anchorptr->gid );
            PrintTime ( anchorptr->mtime );
            printf ( " (dir %2i)  %s\n", anchorptr->holdcnt, anchorptr->name );
            PrintMode ( posptr->mode, posptr->uid, posptr->gid );
            PrintTime ( posptr->mtime );
            printf ( " (dir %2i)  %s\n", posptr->holdcnt, posptr->name );
	  }

          if ( rmflag )
	  {
            printf ("delete file (dir %i): %s ? ", anchorptr->holdcnt, anchorptr->name );

            if ( !YesNo ( ) )
	    {
              /* delete file 1 */
              if ( remove ( anchorptr->name ) != 0 )
	      {
                fprintf ( stderr, "could not remove %s\n", anchorptr->name );
	      }
	    }
            else
	    {
              printf ("delete file (dir %i): %s ? ", posptr->holdcnt, posptr->name );

              if ( !YesNo ( ) )
	      {
                /* delete file 2 */
                if ( remove ( posptr->name ) != 0 )
	        {
                  fprintf ( stderr, "could not remove %s\n", posptr->name );
		}
	      }
	    }
	  }
        }
      }
      anchorptr = anchorptr->samenext;
    }
    nodeptr = nodeptr->next;
  }
}

/* ============================================================================================= */

/****** Cmpsize/Usage *******************************************************
*
*   NAME
*       Usage --  Print Usage (V1)
*
*   SYNOPSIS
*       void = Usage( Programmname, Exitstatus )
*
*       void Usage( char *name, int x )
*
*   FUNCTION
*       Print programm usage.
*
*   INPUTS
*       Programmname and exitstatus.
*
*   RESULT
*       %
*
*   NOTES
*       %
*
*   SEE ALSO
*       %
*
*****************************************************************************
*
*   status ready
*/

void usage ( char *name, int x )
{
  fprintf( stderr, "version: %s\n", VERSION );
  fprintf( stderr, "usage: %s [-dhiwV] dir [dir] [dir] ...\n\n", name );
  fprintf( stderr, "-d  delete one of the double files after asking which\n" );
  fprintf( stderr, "-h  help\n" );
  fprintf( stderr, "-i  print more information of the files\n" );
  fprintf( stderr, "-w  wait after each dir before processing the next\n" );
  fprintf( stderr, "-V  version\n\n" );
  fprintf( stderr, "mail to: d.stolte@tu-bs.de for bugs or improvements\n" );
  exit ( x );
}

/* ============================================================================================= */

int main (int argc, char *argv[])
{
char		*dirname;
char		pwdname[PATH_MAX+1],
		absname[PATH_MAX+1];
char            c;
DIR		*dirptr;
struct dirent	*direntry;
struct stat     *statbuf;
int             argcnt,
                holdflag=0,
                holdcnt=0,              /* count which dir the file belongs to */
                infflag=0,              /* print more information ? */
                rmflag=0;               /* delete files ? */
struct Nodes    *listptr,
                *listposptr;

extern char *optarg;                    /* getopt vars */
extern int optind, opterr, optopt;

direntry = ( struct dirent* ) malloc ( sizeof ( struct dirent ) );
statbuf  = ( struct stat* )   malloc ( sizeof ( struct stat ) );

opterr = 0;

while ((c = getopt(argc, argv, "dhiwV")) != -1)
{
  switch (c)
  {
    case 'd':
      rmflag = 1;
      break;
    case 'h':
      usage ( argv[0], 0 );
      break;
    case 'i':
      infflag = 1;
      break;
    case 'w':
      holdflag = 1;
      break;
    case 'V':
      fprintf( stderr, "version: %s\n", VERSION );
      exit ( 0 );
      break;
    case '?':
      usage ( argv[0], 1 );
      break;
  }
}

if ( ( argc - optind ) == 0 )
{
  usage ( argv[0], 1 );
}

listptr = InitList ( );

getcwd ( pwdname, PATH_MAX );

argcnt = optind;

while ( argc != argcnt )
{
  chdir ( pwdname );

  dirname = argv[argcnt];

  if ( holdflag )
  {
    printf ( "press return to read %s ", dirname );
    while ( ( c = getchar ( ) ) != EOF && c != '\n' )
    {
    }
  }

  holdcnt++;

  if ( chdir ( dirname ) !=0 )
  {
    fprintf ( stderr, "error: chdir %s\n", dirname );
  }
  else
  {
    dirptr = opendir ( "." );

    if ( dirptr == NULL )
    {
      fprintf ( stderr, "error: opendir ...\n" );
    }
    else
    {
      do
      {
        direntry = readdir ( dirptr );
        if ( direntry != NULL )
        {
          if ( stat ( direntry->d_name, statbuf ) != 0 )
	  {
            fprintf ( stderr, "error: stat ...\n" );
	  }
          else
	  {
            if ( S_ISREG ( statbuf->st_mode ) )
	    {
               if ( dirname[ strlen ( dirname ) -1 ] != '/' )
	       {
                 sprintf ( absname, "%s/%s", dirname, direntry->d_name );
	       }
               else
	       {
                 sprintf ( absname, "%s%s", dirname, direntry->d_name );
	       }
               listposptr = PutList ( listptr,
                                      absname,
                                      statbuf->st_mode,
                                      statbuf->st_uid,
                                      statbuf->st_gid,
                                      statbuf->st_size,
                                      statbuf->st_mtime,
                                      holdcnt );

               if ( listposptr != NULL )
	       {
                 listptr = listposptr;
	       }
	    }
	  }
        }
      }
      while ( direntry != NULL );
    }
  closedir ( dirptr );
  }
argcnt = argcnt + 1;
}

chdir ( pwdname );

DiffListFiles ( listptr, rmflag, infflag );

DelList ( listptr );
exit ( 0 );
}
