/*
 *   Copyright (C) 1997, 1998
 *   	Free Software Foundation, Inc.
 *
 *   This program is free software; you can redistribute it and/or modify it
 *   under the terms of the GNU General Public License as published by the
 *   Free Software Foundation; either version 2, or (at your option) any
 *   later version.
 *
 *   This program is distributed in the hope that it will be useful,
 *   but WITHOUT ANY WARRANTY; without even the implied warranty of
 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *   GNU General Public License for more details.
 *
 *   You should have received a copy of the GNU General Public License
 *   along with this program; if not, write to the Free Software
 *   Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. 
 *
 */
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif /* HAVE_CONFIG_H */

#ifdef HAVE_STDLIB_H
#include <stdlib.h>
#endif /* HAVE_STDLIB_H */
#include <stdio.h>
#include <string.h>
#include <sys/types.h>

#include <salloc.h>
#include <logfile.h>

#include <getopttools.h>
#include <crawl.h>
#include <webbase.h>
#include <webbase_create.h>
#include <WebbaseDl.h>

#define MAX_OPTIONS 100
#define APPLICATION_OPTIONS		0x8000000

typedef struct crawler_params {
  crawl_params_t* crawl;
  int home_pages;
  int externals;
  int unload;
  int schema;
  int create;
  int unload_keep_start;
  int rehook;
  int rebuild;
  char* urls;
  int touch;
  int remove_unloaded;
  char* where_start;
  char* log;
} crawler_params_t;

static crawler_params_t params;

static int verbose = 0;

static void init(int argc, char** argv);
static void finish();

int main(int argc, char** argv)
{
  crawl_params_t* crawl;

  init(argc, argv);

  crawl = params.crawl;

  if(params.home_pages) {
    hp_load_1(crawl, params.where_start);
  } else if(params.create) {
    const char** schema = 0;
    int schema_length = 0;
    if(optind < argc) {
      schema = (const char**)(argv + optind);
      schema_length = argc - optind;
    }
    webbase_create(&crawl->base->mysql, schema, schema_length);
  } else if(params.schema) {
    webbase_schema();
  } else if(params.rehook) {
    crawl_rehook(crawl);
  } else if(params.rebuild) {
    crawl_rebuild(crawl, params.rebuild);
  } else if(params.urls) {
    crawl_urls(crawl, params.urls);
  } else if(params.externals) {
    hp_print_externals(crawl, params.where_start);
  } else {
    int i;
    uri_t* url_object = uri_alloc("http://fake.net/", 16);
    for(i = optind; i < argc; i++) {
      char* url;
      if(uri_realloc(url_object, argv[i], strlen(argv[i])) != URI_CANNONICAL) {
	fprintf(stderr, "crawler: cannnot cannonicalize %s, ignored\n", argv[i]);
      }
      url = uri_uri(url_object);
      if(params.remove_unloaded)
	hp_remove_unloaded(crawl, url);
      if(params.touch)
	crawl_touch(crawl, url);
      else if(params.unload)
	hp_unload(crawl, url, params.unload_keep_start);
      else
	hp_load_in_core(crawl, url);
    }
  }
  finish();
  return 0;
}

void finish()
{
  crawl_free(params.crawl);
  if(params.log) free(params.log);
  if(params.where_start) free(params.where_start);
  if(params.urls) free(params.urls);
  exit(0);
}

extern "C" {
void* hooksmifluz_init();
}

static void init(int argc, char** argv)
{
  WebbaseDl::Instance()->Parse(argc, argv);
  
  static struct option long_options[MAX_OPTIONS + 1] =
  {
    /* These options set a flag. */
    {"verbose", 0, &verbose, 1},
    {"log", 1, 0, 0},
    {"help", 0, 0, 0},
    {"where_start", 1, 0, 0},
    {"urls", 1, 0, 0},
    {"unload", 0, 0, 0},
    {"unload_keep_start", 0, 0, 0},
    {"create", 0, &params.create, 1},
    {"schema", 0, &params.schema, 1},
    {"rehook", 0, &params.rehook, 1},
    {"rebuild", 0, &params.rebuild, 1},
    {"rebuild_resume", 0, &params.rebuild, 2},
    {"externals", 0, &params.externals, 1},
    {"home_pages", 0, &params.home_pages, 1},
    {"remove_unloaded", 0, &params.remove_unloaded, 1},
    {"touch", 0, &params.touch, 1},
    {0, MAX_OPTIONS, 0, APPLICATION_OPTIONS}
  };

  getopt_merge(long_options, crawl_options(long_options));

  opterr = 0;
  optind = 0;
  while(1) {
    /* `getopt_long' stores the option index here. */
    int option_index = 0;
    int c;

    c = getopt_long_only(argc, argv, "-", long_options, &option_index);

    /* Detect the end of the options. */
    if (c == -1)
      break;
     
    switch (c)
      {
      case 0:
	/* If this option set a flag, do nothing else now. */
	if (long_options[option_index].flag != 0)
	  break;
	if(!strcmp(long_options[option_index].name, "log")) {
	  params.log = strdup(optarg);
	} else if(!strcmp(long_options[option_index].name, "help")) {
	  getopt_dump(long_options);
	  exit(0);
	} else if(!strcmp(long_options[option_index].name, "unload")) {
	  params.unload = 1;
	} else if(!strcmp(long_options[option_index].name, "unload_keep_start")) {
	  params.unload_keep_start = 1;
	  params.unload = 1;
	} else if(!strcmp(long_options[option_index].name, "where_start")) {
	  params.where_start = strdup(optarg);
	} else if(!strcmp(long_options[option_index].name, "urls")) {
	  params.urls = strdup(optarg);
	} else if(!strcmp(long_options[option_index].name, "")) {
	}
	break;
      }
  }

  params.crawl = crawl_alloc(argc, argv, long_options);

  if(params.log) logfile(params.log);

  return;
}
