#if defined(HAVE_CONFIG_H)
#include "config.h"
#endif

#include <iostream>
#include <stdlib.h>
#include <time.h>

extern "C" {
#undef VERSION
#undef PACKAGE
#include "wwwconf.h"
#include "WWWLib.h"
#include "HTProfil.h"
#include "HText.h"
#include "HTMethod.h"
#include "HTHome.h"
}

#include "override.h"
#include "excpt.h"
#include "linkopt.h"
#include "olink.h"
#include "ancheck.h"
#include "progress.h"
#include "reqctx.h"
#include "result.h"
#include "pipe.h"
#include "output.h"
#include "expstat.h"
#include "redirector.h"
#include "version.h"
#include "database.h"

// 11Jul1999: egcs 1.1.1 doesn't handle these templates
// - just skip it
#if 0
#include "wall.h"
#endif

static AnchorChecker *g_AnchorChecker = NULL;
static Progress *g_Progress = NULL;
static ExpectedStatus *g_ExpectedStatus = NULL;
static Redirector *g_Redirector = NULL;

typedef map<string, int> TProblems;
static TProblems g_Problems;

bool g_Verbose = true;

void Run(int argc, char *argv[]);

void OverrideOptionsFromConfigFile(CLinkOptions &options);

void HandleLink(HText *text,
    int element_number,
    int attribute_number,
    HTChildAnchor *anchor,
    const BOOL *present,
    const char **value);
int HandlePageEnd(HTRequest *request,
    HTResponse *response,
    void *param,
    int status);
int HandleRedirect(HTRequest *request,
    HTResponse *response,
    void *param,
    int status);

void SafeLink(HText *text,
    int element_number,
    int attribute_number,
    HTChildAnchor *anchor,
    const BOOL *present,
    const char **value);

int main(int argc, char *argv[])
{
    try
    {   Output::SetName("linkcheck");
	Run(argc, argv);
        return 0;
    }

    catch (FileOpenException &e)
    {   Output::GetSink() << "can't open config file " << e.FName << '\n';
        return -1;
    }
    catch (FileParseException &e)
    {   Output::GetSink() << "can't parse config file " << e.FName <<
	    ": line " << e.Line << ": " << e.Mess << '\n';
        return -1;
    }
    catch (UndefinedKeyException &e)
    {   Output::GetSink() << "undefined option " << e.Key << '\n';
        return -1;
    }
    catch (MissingValueException &e)
    {   Output::GetSink() << "option " << e.Key << " requires a value\n";
        return -1;
    }
    catch (MultipleDefinitionException &e)
    {   Output::GetSink() << "option " << e.Key << " was defined more "
	    "than once\n";
        return -1;
    }
    catch (Exception &)
    {   Output::GetSink() << "error processing options\n";
        return -1;
    }

    catch (InvalidUriException &e)
    {   Output::GetSink() << "invalid uri " << e.Uri << '\n';
        return -1;
    }
    catch (LoadFailureException &e)
    {   Output::GetSink() << "can't load " << e.Uri << '\n';
        return -1;
    }
    catch (OpenPipeException &e)
    {   Output::GetSink() << "can't execute " << e.Command << '\n';
        return -1;
    }
    catch (UrlStatusFormatException &e)
    {   Output::GetSink() << e.UrlRange << "is not a valid description of "
	    "URL with expected status" << '\n' <<
	    "\tthe description must have a form of \"url=status\",\n"
	    "\twhere status is a number from 100 to 999 (or a status \n"
	    "\tcode class)\n";
        return -1;
    }

#if defined(DBSUPP_MYSQL)
    catch (ConnectError &e)
    {   Output::GetSink() << "can't connect: " << e.Message << '\n';
        return -1;
    }
#endif

    // lots of types missing here
    catch (...)
    {   Output::GetSink() << "unrecognized failure\n";
        return -1;
    }
}

void LoadTop(string uri)
{
    char *nu = HTParse(uri.c_str(), "", PARSE_ALL);
    if (!nu)
	throw InvalidUriException(uri);

    uri = nu;
    free(nu);

    HTRequest *request = HTRequest_new();
    HTRequest_setContext(request, new CRequestContext(uri));
    g_Progress->Load(uri.c_str(), request);
}

void Run(int argc, char *argv[])
{
    time_t start_time = time(NULL);

    CLinkOptions options;
    OverrideOptionsFromConfigFile(options);
    OverrideOptions(options, argv + 1, string(o_start_url));

    g_Verbose = options.Get_bool(o_verbose);
    string lwt = options.Get(o_libwww_trace);
    if (!lwt.empty())
	HTSetTraceMessageMask(lwt.c_str());

    HTProfile_newRobot("linkcheck", g_LinkcheckVersion);

    HTNet_addAfter(HandleRedirect, "http://*", NULL,
	HT_PERM_REDIRECT, HT_FILTER_LATE);
	// could be registered conditionally

    HTNet_addAfter(HandlePageEnd, NULL, NULL, HT_ALL, HT_FILTER_LAST);
    HTAlert_setInteractive(NO);
    HText_registerLinkCallback(SafeLink);

    string pipe = options.Get(o_message_pipe);
    if (pipe.empty())
    {   Output::GetSink(LOG_WARNING) << "missing command to "
	    "send results - specify " << o_message_pipe << " option\n";
        return;
    }

    TStringCol start = options.Get_TStringCol(o_start_url);
    if (start.empty())
    {   Output::GetSink(LOG_WARNING) << "no URLs to check - specify " <<
	    o_start_url << " option\n";
        return;
    }

    Database topology(options);

    g_AnchorChecker = new AnchorChecker(options);
    g_Progress = new Progress(topology);
    g_ExpectedStatus = new ExpectedStatus(options);
    g_Redirector = new Redirector(options);

    for (TStringCol::const_iterator i = start.begin();
	i != start.end();
	++i)
	LoadTop(*i);

    HTEventList_newLoop();

    Result result(options, start_time, time(NULL));
    for (TProblems::const_iterator i = g_Problems.begin();
	i != g_Problems.end();
	++i)
    {   result.Add(i->first,
	    topology.GetDependencies(i->first),
	    i->second);
    }

    if (g_Verbose)
	Output::GetSink(LOG_INFO) << "found " << 
	    static_cast<int>(g_Problems.size()) <<
	    " problematic URLs\n";

    result.Send();

    delete g_Redirector;
    g_Redirector = NULL;
    delete g_ExpectedStatus;
    g_ExpectedStatus = NULL;
    delete g_Progress;
    g_Progress = NULL;
    delete g_AnchorChecker;
    g_AnchorChecker = NULL;

    HTProfile_delete();
}

void HandleLink(HText *text,
    int element_number,
    int attribute_number,
    HTChildAnchor *anchor,
    const BOOL *present,
    const char **value)
{
    char *parent_ptr = HTAnchor_address((HTAnchor *)anchor);
    assert(parent_ptr != NULL);
    string parent(parent_ptr);

    HTAnchor *dest = HTAnchor_followMainLink((HTAnchor *)anchor);
    HTParentAnchor *dest_parent = HTAnchor_parent(dest);
    char *raw_ptr = HTAnchor_address((HTAnchor *)dest_parent);
    if (!raw_ptr)
	return;

    string raw_url = raw_ptr;
    free(raw_ptr);

    char *nu = HTParse(raw_url.c_str(), "", PARSE_ALL);
    if (nu == NULL)
	return;

    string url(nu);
    free(nu);

    HTMethod method;
    if (!g_AnchorChecker->IsInteresting(url.c_str(), method))
    {   if (g_Verbose)
   	    Output::GetSink(LOG_INFO) << "skipping " << url <<
	        " (uninteresting scheme)\n";
	return;
    }

    if (method == METHOD_HEAD)
    {   const char *ua = url.c_str();
	const char *p = strchr(ua, '?'); // what about ';'?
        if (p)
	    url = string(ua, p - ua);
    }

    if (!g_Progress->Register(url, parent))
    {   if (g_Verbose)
	    Output::GetSink(LOG_INFO) << "skipping " << url <<
	        " (already handled)\n";
	return;
    }

    if (g_Verbose)
	Output::GetSink(LOG_INFO) << "checking " << url << 
	    " with " << HTMethod_name(method) << '\n';

    HTRequest *request = HTRequest_new();
    HTRequest_setContext(request, new CRequestContext(url));
    HTRequest_setMethod(request, method);

    g_Progress->Load((HTAnchor *)dest_parent, request);
}

int HandlePageEnd(HTRequest *request,
    HTResponse *response,
    void *param,
    int status)
{
    CRequestContext *ctx = reinterpret_cast<CRequestContext *>(
	HTRequest_context(request));

    if (g_Verbose)
	Output::GetSink(LOG_INFO) << "checked " << ctx->Uri <<
	    " with status " << status << '\n';

    if (status == -902)
    {   HTList *errlist = HTRequest_error(request);
        HTError *err;
        while ((err = (HTError *)HTList_nextObject(errlist)) != NULL)
	    Output::GetSink(LOG_INFO) << "\terror " << \
		HTError_index(err) << " in " << \
		HTError_location(err) << '\n';
    }

    g_Redirector->FreeRedirectionChain(ctx->Uri);

    int s = status >= 0 ? status : -status;
    if (!g_ExpectedStatus->IsExpectedStatus(ctx->Uri, s))
	g_Problems[ctx->Uri] = status;

    HTRequest_delete(request);
    delete ctx;
    g_Progress->Finish();
    return 0;
}

void OverrideOptionsFromConfigFile(CLinkOptions &options)
{
    OverrideCond(options, "/etc/linkcheck.conf");

    const char *h = getenv("HOME");
    if (h)
    {   string hf = h;
        if (!hf.empty())
        {   if (hf[hf.length() - 1] != '/')
	        hf += '/';
	    hf += ".linkcheck";
	    OverrideCond(options, hf.c_str());
	}
    }
}

int HandleRedirect(HTRequest *request,
    HTResponse *response,
    void *param,
    int status)
{
    CRequestContext *ctx = reinterpret_cast<CRequestContext *>(
	HTRequest_context(request));

    if (g_Verbose)
	Output::GetSink(LOG_INFO) << "handling redirection for " 
	    << ctx->Uri << '\n';

    if (!g_Redirector->IsRedirectworthy(ctx->Uri))
	return HT_OK;

    return HTRedirectFilter(request, response, param, status);
}

void SafeLink(HText *text,
    int element_number,
    int attribute_number,
    HTChildAnchor *anchor,
    const BOOL *present,
    const char **value)
{
    try
    {   HandleLink(text, element_number, attribute_number,
	    anchor, present, value);
    }
    catch (QueryError &e)
    {   Output::GetSink() << "query \"" << e.Query << 
	    "\" failed with " << e.Message << " (" <<
	    e.ErrorNumber << ")\n";
    }
    catch (...)
    {   Output::GetSink() << " unknown error in link handler\n";
    }
}

