/***************************************
  $Header: /home/amb/wwwoffle/RCS/purge.c 2.29 2000/10/21 18:46:00 amb Exp $

  WWWOFFLE - World Wide Web Offline Explorer - Version 2.6.
  Purge old files from the cache.
  ******************/ /******************
  Written by Andrew M. Bishop

  This file Copyright 1996,97,98,99,2000 Andrew M. Bishop
  It may be distributed under the GNU Public License, version 2, or
  any higher version.  See section COPYING of the GNU Public license
  for conditions under which this file may be redistributed.
  ***************************************/


#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#include <time.h>
#include <utime.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <time.h>
#include <dirent.h>
#include <unistd.h>

#if defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__)
#include <sys/param.h>
#include <sys/mount.h>
#elif (defined(__sun__) && defined(__svr4__)) || defined(__irix__)
#include <sys/statvfs.h>
#elif defined(__osf__)
#include <sys/mount.h>
#else
#include <sys/vfs.h>
#endif

#if (defined(__sun__) && defined(__svr4__)) || defined(__irix__)
#define STATFS statvfs
#else
#define STATFS statfs
#endif

#include "wwwoffle.h"
#include "misc.h"
#include "proto.h"
#include "config.h"
#include "errors.h"


/*+ The file descriptor of the sppol directory. +*/
extern int fSpoolDir;


/* Local functions */
static long PurgeFiles(char *proto,char *host,int def_age);
static int what_purge_age(char *proto,char *host);
static int what_purge_age_url(URL *Url);

/*+ Set this to 0 for debugging so that nothing is deleted. +*/
#define DO_DELETE 1

/*+ The current time. +*/
static time_t now;

/*+ The number of blocks left of each age. +*/
static int *blocks_by_age;

/*+ The scaling factor for the ages in the second pass. +*/
static double age_scale;

/*+ The pass in the purge (1 or 2). +*/
static int pass;

/*+ The blocksize. +*/
static long int blocksize;

/*+ The configuraion file options, looked up once then used. +*/
static int purge_use_mtime;
static int purge_use_url;
static int purge_default_age;

/*
 Note: Since some UNIX versions (Solaris) have st_size/st_blocks different for
       different files I have reverted to using 1+(st_size/blocksze) instead
       of st_blocks for counting the number of blocks in a file and using
       st_size/blocksize for directories (since they are multiples of blocks).
       This gives a small error in some cases but is fast to calculate compared
       to the more accurate value of st_size/blocksize+!!(st_size%blocksize).
*/


/*++++++++++++++++++++++++++++++++++++++
  Purge files from the cache that meet the age criteria.

  int fd the file descriptor of the wwwoffle client.
  ++++++++++++++++++++++++++++++++++++++*/

void PurgeCache(int fd)
{
 int i,p;
 long diskfree;
 struct STATFS sbuf;
 struct stat buf;

 purge_default_age=what_purge_age("*","*");
 purge_use_mtime=ConfigBoolean(PurgeUseMTime);
 purge_use_url=ConfigBoolean(PurgeUseURL);

 now=time(NULL)+600;

 age_scale=-1;

 blocks_by_age=(int*)malloc((purge_default_age+2)*sizeof(int));

 for(i=0;i<=purge_default_age+1;i++)
    blocks_by_age[i]=0;

 if(stat(".",&buf) || buf.st_size==-1 || buf.st_blocks==-1)
   {
    PrintMessage(Warning,"Cannot determine the disk block size [%!s]; using 1024 instead.");
    blocksize=1024;
   }
 else if(buf.st_blocks==0)
   {
    PrintMessage(Warning,"The number of blocks (0) looks wrong; using 1024 for blocksize.");
    blocksize=1024;
   }
 else
   {
    blocksize=buf.st_size/buf.st_blocks;

    if(blocksize!=512 && blocksize!=1024 && blocksize!=2048)
      {
       PrintMessage(Warning,"The blocksize (%d) looks wrong; using 1024 instead.",blocksize);
       blocksize=1024;
      }
   }

 /* Note: blocksize can be only 512, 1024 or 2048. */

 /* Handle this carefully to avoid overflows from doing blocks*blocksize/1024
                                or any errors from doing (blocks/1024)*blocksize */
#define Blocks_to_kB(blocks) \
 (long)((blocksize==512)?((blocks)/2): \
       ((blocksize==1024)?(blocks): \
        (blocks)*2)) \


 if(STATFS(".",&sbuf) || sbuf.f_bsize==-1 || sbuf.f_bavail==-1)
   {
    PrintMessage(Warning,"Cannot determine the disk free space [%!s]; assuming 0.");
    diskfree=0;
   }
 else
   {
    int bs=blocksize,dbs=sbuf.f_bsize;

    /* Do this carefully to stop overflow and reduce errors. */

    while(!(dbs&1) && !(bs&1))  /* remove powers of 2. */
      {dbs>>=1;bs>>=1;}

    /* If both were powers of 2 then there should be no problem (either dbs or bs is now 1).
       If not then I am assuming that sbuf.f_bavail is larger than dbs so the error is smaller. */

    diskfree=dbs*(sbuf.f_bavail/bs);
   }

 for(pass=1;pass<=2;pass++)
   {
    int total_blocks=0,total_dirs=0,dir_blocks;

    write_string(fd,"\n");

    if(ConfigInteger(PurgeCacheSize))
       if(pass==1)
          write_string(fd,"Pass 1: Checking dates and sizes of files:\n");
       else
          write_string(fd,"Pass 2: Purging files down to specified size:\n");
    else
       write_string(fd,"Checking dates of files:\n");

    if(pass==1)
      {
       if(ConfigBoolean(PurgeUseMTime))
          write_string(fd,"  (Using modification time.)\n");
       else
          write_string(fd,"  (Using last access time.)\n");

       if(ConfigBoolean(PurgeUseURL))
          write_string(fd,"  (Using the full URL.)\n");
       else
          write_string(fd,"  (Using the hostname and protocol only.)\n");
      }

    write_string(fd,"\n");

    for(p=0;p<NProtocols;p++)
      {
       DIR *dir;
       struct dirent* ent;
       struct stat buf;
       char *proto=Protocols[p].name;

       /* Open the spool directory. */

       if(stat(proto,&buf))
         {PrintMessage(Inform,"Cannot stat directory '%s' [%!s]; not purged",proto);continue;}

       dir_blocks=buf.st_size/blocksize;

       total_blocks+=dir_blocks;
       blocks_by_age[purge_default_age+1]+=dir_blocks;

       if(chdir(proto))
         {PrintMessage(Warning,"Cannot change to directory '%s' [%!s]; not purged.",proto);continue;}

       dir=opendir(".");
       if(!dir)
         {PrintMessage(Warning,"Cannot open directory '%s' [%!s]; not purged.",proto);fchdir(fSpoolDir);continue;}

       ent=readdir(dir);  /* skip .  */
       if(!ent)
         {PrintMessage(Warning,"Cannot read directory '%s' [%!s]; not purged.",proto);closedir(dir);fchdir(fSpoolDir);continue;}
       ent=readdir(dir);  /* skip .. */

       /* Search through all of the sub directories. */

       while((ent=readdir(dir)))
         {
          struct stat buf;

          if(stat(ent->d_name,&buf))
             PrintMessage(Inform,"Cannot stat directory '%s/%s' [%!s]; race condition?",proto,ent->d_name);
          else if(S_ISDIR(buf.st_mode))
            {
             long file_blocks;
             int age;

             age=what_purge_age(proto,ent->d_name);

             file_blocks=PurgeFiles(proto,ent->d_name,age);
             dir_blocks=buf.st_size/blocksize;

             if(file_blocks==-1)
               {
#if DO_DELETE
                if(rmdir(ent->d_name))
                   PrintMessage(Warning,"Cannot delete what should be an empty directory '%s/%s' [%!s].",proto,ent->d_name);
#else
                PrintMessage(Debug,"rmdir(%s/%s).",proto,ent->d_name);
#endif
               }
             else
               {
                struct utimbuf utbuf;

                utbuf.actime=buf.st_atime;
                utbuf.modtime=buf.st_mtime;
                utime(ent->d_name,&utbuf);

                blocks_by_age[purge_default_age+1]+=dir_blocks;
                total_blocks+=file_blocks+dir_blocks;
                total_dirs++;
               }

             if(purge_use_url)
               {
                if(file_blocks==-1)
                   write_formatted(fd,"Purged %6s://%-32s ; (empty) - deleted\n",proto,ent->d_name);
                else
                   write_formatted(fd,"Purged %6s://%-32s ; %5ld kB\n",proto,ent->d_name,Blocks_to_kB(file_blocks+dir_blocks));
               }
             else
               {
                if(pass==2 && age>0)
                   age=(int)(age*age_scale+0.5);

                if(age<0)
                   write_formatted(fd,"Not Purged       %6s://%-32s ; %5ld kB\n",proto,ent->d_name,Blocks_to_kB(file_blocks+dir_blocks));
                else if(file_blocks==-1)
                   write_formatted(fd,"Purged (%2d days) %6s://%-32s ; (empty) - deleted\n",age,proto,ent->d_name);
                else
                   write_formatted(fd,"Purged (%2d days) %6s://%-32s ; %5ld kB\n",age,proto,ent->d_name,Blocks_to_kB(file_blocks+dir_blocks));
               }
            }
          else
            {
             PrintMessage(Warning,"Found an unexpected file instead of a directory '%s/%s' [%!s]; deleting it.",proto,ent->d_name);

#if DO_DELETE
             if(unlink(ent->d_name))
                PrintMessage(Warning,"Cannot delete the non-directory '%s/%s' [%!s].",proto,ent->d_name);
#else
             PrintMessage(Debug,"unlink(%s/%s).",proto,ent->d_name);
#endif
            }

         }

       closedir(dir);
       fchdir(fSpoolDir);
      }

    write_string(fd,"\n");
    write_formatted(fd,"Total of %d directories ; %ld kB\n",total_dirs,Blocks_to_kB(total_blocks));

    if(pass==1)
      {
       int age_for_size=-1,age_for_free=-1;
       int age_blocks_used=blocks_by_age[purge_default_age+1];
       int age_blocks_free=diskfree+total_blocks-blocks_by_age[purge_default_age+1];

       write_string(fd,"\n");
       write_string(fd,"Age Profile of cached pages:\n");
       write_formatted(fd,"  (All ages scaled to the range 0 -> %d (default age).)\n",purge_default_age);
       write_string(fd,"\n");

       write_formatted(fd,"Total not purged   ; %5ld kB (%6ld kB free)\n",
                       Blocks_to_kB(age_blocks_used),Blocks_to_kB(age_blocks_free));
       write_string(fd,"\n");

       for(i=0;i<=purge_default_age;i++)
         {
          age_blocks_used+=blocks_by_age[i];
          age_blocks_free-=blocks_by_age[i];

          if(ConfigInteger(PurgeCacheSize) && age_for_size<0 &&
             Blocks_to_kB(age_blocks_used)>(1024*ConfigInteger(PurgeCacheSize)))
            {
             age_for_size=i;

             write_formatted(fd,"Cutoff Age is %2d days for %3d MB cache size\n",age_for_size,ConfigInteger(PurgeCacheSize));
            }

          if(ConfigInteger(PurgeDiskFree) && diskfree && age_for_free<0 &&
             Blocks_to_kB(age_blocks_free)<(1024*ConfigInteger(PurgeDiskFree)))
            {
             age_for_free=i;

             write_formatted(fd,"Cutoff Age is %2d days for %3d MB disk free\n",age_for_free,ConfigInteger(PurgeDiskFree));
            }

          if(i==purge_default_age)
             write_formatted(fd,"Total all ages     ; %5ld kB (%6ld kB free)\n",
                             Blocks_to_kB(age_blocks_used-blocks_by_age[purge_default_age+1]),
                             diskfree?Blocks_to_kB(age_blocks_free):0);
          else
             write_formatted(fd,"Newer than %2d day%c ; %5ld kB (%6ld kB free)\n",i+1,i?'s':' ',
                             Blocks_to_kB(age_blocks_used-blocks_by_age[purge_default_age+1]),
                             diskfree?Blocks_to_kB(age_blocks_free):0);
         }

       if(purge_default_age)
         {
          if(age_for_size!=-1 && (age_for_size<=age_for_free || age_for_free==-1))
             age_scale=(double)age_for_size/(double)purge_default_age;
          else if(age_for_free!=-1 && (age_for_free<age_for_size || age_for_size==-1))
             age_scale=(double)age_for_free/(double)purge_default_age;
         }
       else if(age_for_size!=-1 || age_for_free!=-1)
          age_scale=0;
      }

    if(age_scale==-1)
       break;
   }

 write_string(fd,"\n");

 free(blocks_by_age);

 /* Purge the tmp.* files in outgoing. */

 if(chdir("outgoing"))
    PrintMessage(Warning,"Cannot change to directory 'outgoing' [%!s]; not purged.");
 else
   {
    DIR *dir;
    struct dirent* ent;

    dir=opendir(".");
    if(!dir)
      PrintMessage(Warning,"Cannot open directory 'outgoing' [%!s]; not purged.");
    else
      {
       ent=readdir(dir);  /* skip .  */
       if(!ent)
          PrintMessage(Warning,"Cannot read directory 'outgoing' [%!s]; not purged.");
       else
         {
          ent=readdir(dir);  /* skip .. */

          while((ent=readdir(dir)))
             if(!strncmp(ent->d_name,"tmp.",4))
               {
                struct stat buf;

                if(!stat(ent->d_name,&buf) && buf.st_mtime<(now-60))
                  {
#if DO_DELETE
                   if(unlink(ent->d_name))
                      PrintMessage(Warning,"Cannot unlink file 'outgoing/%s' [%!s].",ent->d_name);
#else
                   PrintMessage(Debug,"unlink(outgoing/%s).",ent->d_name);
#endif
                  }
               }
         }

       closedir(dir);
      }
   }

 fchdir(fSpoolDir);

 /* Purge the tmp.* files in temp. */

 if(chdir("temp"))
    PrintMessage(Warning,"Cannot change to directory 'temp' [%!s]; not purged.");
 else
   {
    DIR *dir;
    struct dirent* ent;

    dir=opendir(".");
    if(!dir)
      PrintMessage(Warning,"Cannot open directory 'temp' [%!s]; not purged.");
    else
      {
       ent=readdir(dir);  /* skip .  */
       if(!ent)
          PrintMessage(Warning,"Cannot read directory 'temp' [%!s]; not purged.");
       else
         {
          ent=readdir(dir);  /* skip .. */

          while((ent=readdir(dir)))
             if(!strncmp(ent->d_name,"tmp.",4))
               {
                struct stat buf;

                if(!stat(ent->d_name,&buf) && buf.st_mtime<(now-60))
                  {
#if DO_DELETE
                   if(unlink(ent->d_name))
                      PrintMessage(Warning,"Cannot unlink file 'temp/%s' [%!s].",ent->d_name);
#else
                   PrintMessage(Debug,"unlink(temp/%s).",ent->d_name);
#endif
                  }
               }
         }

       closedir(dir);
      }
   }

 fchdir(fSpoolDir);
}


/*++++++++++++++++++++++++++++++++++++++
  Delete the file in the current directory that are older than the specified age.

  long PurgeFiles Returns the number of blocks in files that are left.

  char *proto The name of the protocol directory to purge.

  char *host The name of the host directory to purge.

  int def_age The default age to use for this host.
  ++++++++++++++++++++++++++++++++++++++*/

static long PurgeFiles(char *proto,char *host,int def_age)
{
 long blocks_left=-1;
 DIR *dir;
 struct dirent* ent;

 /* Open the spool subdirectory. */

 if(chdir(host))
   {PrintMessage(Warning,"Cannot change to directory '%s/%s' [%!s]; not purged.",proto,host);return(1);}

 dir=opendir(".");
 if(!dir)
   {PrintMessage(Warning,"Cannot open directory '%s/%s' [%!s]; not purged.",proto,host);fchdir(fSpoolDir);chdir(proto);return(1);}

 ent=readdir(dir);  /* skip .  */
 if(!ent)
   {PrintMessage(Warning,"Cannot read directory '%s/%s' [%!s]; not purged.",proto,host);closedir(dir);fchdir(fSpoolDir);chdir(proto);return(1);}
 ent=readdir(dir);  /* skip .. */

 /* Check all of the files for age, and delete as needed. */

 while((ent=readdir(dir)))
   {
    struct stat buf,buf2;

    if(stat(ent->d_name,&buf))
       ;
/*
       PrintMessage(Inform,"Cannot stat file '%s/%s/%s' [%!s]; race condition?",proto,host,ent->d_name);
*/
    else
      {
       int age=purge_default_age;
       time_t t=now;

       if(buf.st_mtime>now || buf.st_atime>now)
         {
          PrintMessage(Inform,"Cached file '%s/%s/%s' has a future timestamp; changing timestamp.",proto,host,ent->d_name);
#if DO_DELETE
          utime(ent->d_name,NULL);
#else
          PrintMessage(Debug,"utime(%s/%s/%s).",proto,host,ent->d_name);
#endif
         }

       if(*ent->d_name=='U' || *ent->d_name=='D')
         {
          int s;

          *ent->d_name^='U'^'D';
          s=stat(ent->d_name,&buf2);
          *ent->d_name^='U'^'D';

          if(s)
            {
             PrintMessage(Inform,"Cached file '%s/%s/%s' is not complete (U* and D* files); deleting it.",proto,host,ent->d_name);
             age=0;
            }
          else if(*ent->d_name=='U')
             continue;
          else if(purge_use_url)
            {
             char *url=FileNameToURL(ent->d_name);
             if(url)
               {
                URL *Url=SplitURL(url);
                age=what_purge_age_url(Url);
                FreeURL(Url);
                free(url);
               }
             else
                age=0;
            }
          else
             age=def_age;

          if(purge_use_mtime)
             t=buf.st_mtime;
          else
             t=buf.st_atime;
         }
       else
         {
          PrintMessage(Inform,"Cached file '%s/%s/%s' is not valid (U* or D* file); deleting it.",proto,host,ent->d_name);
          age=0;
         }

       if(pass==2 && age>0)
          age=(int)(age*age_scale+0.5);

       if(age==-1 || t>(now-age*(24*3600)))
         {
          long size=2+(buf.st_size+buf2.st_size)/blocksize;

          if(blocks_left==-1)
             blocks_left=0;
          blocks_left+=size;

          if(age>0)
            {
             int days=(now-t)/(24*3600);

             days=days*purge_default_age/age; /* scale the age to fit into 0 -> DefaultPurgeAge */

             if(days>purge_default_age)
                days=purge_default_age;
             blocks_by_age[days]+=size;
            }
          else
             blocks_by_age[purge_default_age+1]+=size;
         }
       else
         {
#if DO_DELETE
          if(unlink(ent->d_name))
             PrintMessage(Warning,"Cannot unlink file '%s/%s/%s' [%!s].",proto,host,ent->d_name);
#else
          PrintMessage(Debug,"unlink(%s/%s/%s).",proto,host,ent->d_name);
#endif

          if(*ent->d_name=='U' || *ent->d_name=='D')
            {
             *ent->d_name^='U'^'D';

#if DO_DELETE
             if(unlink(ent->d_name))
                PrintMessage(Warning,"Cannot unlink file(2) '%s/%s/%s' [%!s].",proto,host,ent->d_name);
#else
             PrintMessage(Debug,"unlink(%s/%s/%s).",proto,host,ent->d_name);
#endif
            }
         }
      }
   }

 closedir(dir);
 fchdir(fSpoolDir);
 chdir(proto);

 return(blocks_left);
}


/*++++++++++++++++++++++++++++++++++++++
  Determine the age to use when purging for a specified URL.

  int what_purge_age Return the age in days.

  char *proto The protocol to use.

  char *host The host to use.
  ++++++++++++++++++++++++++++++++++++++*/

static int what_purge_age(char *proto,char *host)
{
 int age;
 URL Url;

 /* cheat a bit here and poke in the values we need. */

 Url.proto=proto;
 Url.host=host;
 Url.path="/";
 Url.args=NULL;

 age=ConfigIntegerURL(PurgeAges,&Url);

 if(ConfigBoolean(PurgeDontGet) && ConfigBooleanMatchURL(DontGet,&Url))
    age=0;

 if(ConfigBoolean(PurgeDontCache) && (ConfigBooleanMatchURL(DontCache,&Url) || IsLocalNetHost(host)))
    age=0;

 return(age);
}


/*++++++++++++++++++++++++++++++++++++++
  Determine the age to use when purging for a specified URL.

  int what_purge_age_url Return the age in days.

  URL *Url The URL that is to be purged.
  ++++++++++++++++++++++++++++++++++++++*/

static int what_purge_age_url(URL *Url)
{
 int age;

 age=ConfigIntegerURL(PurgeAges,Url);

 if(ConfigBoolean(PurgeDontGet) && ConfigBooleanMatchURL(DontGet,Url))
    age=0;

 if(ConfigBoolean(PurgeDontCache) && (ConfigBooleanMatchURL(DontCache,Url) || IsLocalNetHost(Url->host)))
    age=0;

 return(age);
}
