/*
 * This file is part of Crossbow.
 *
 * Crossbow is free software: you can redistribute it and/or
 * modify it under the terms of the GNU General Public
 * License as published by the Free Software Foundation,
 * either version 3 of the License, or (at your option) any
 * later version.
 *
 * Crossbow is distributed in the hope that it will be
 * useful, but WITHOUT ANY WARRANTY; without even the
 * implied warranty of MERCHANTABILITY or FITNESS FOR A
 * PARTICULAR PURPOSE.  See the GNU General Public License
 * for more details.
 *
 * You should have received a copy of the GNU General Public
 * License along with Crossbow.  If not, see
 * <https://www.gnu.org/licenses/>.
 */

#include <err.h>
#include <errno.h>
#include <fcntl.h>
#include <limits.h>
#include <stdbool.h>
#include <stdlib.h>
#include <sysexits.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <unistd.h>

#include <mrss.h>
#include <uthash.h>

#include "config.h"
#include "download.h"
#include "filemap.h"
#include "logging.h"
#include "outfmt.h"
#include "persist_dir.h"
#include "persist_file.h"
#include "persist_items.h"
#include "placeholders.h"
#include "rc.h"
#include "text.h"
#include "util.h"

typedef struct
{
    const char *uid;
    bool dry_mark : 1;
    bool dry_exec : 1;
    bool catch_up : 1;
} opts_t;

typedef struct
{
    const char *name;
    const rc_feed_t *config;
    persist_file_t *persist_file;
    bool own_persist;
    bool ofmt_loaded;
    bool enabled;
    struct ctx *global_ctx;

    UT_hash_handle hh;
} feed_info_t;

typedef struct
{
    feed_info_t *slots;
    feed_info_t *map;
    unsigned size;
} feed_map_t;

typedef struct ctx
{
    const rc_t *rc;
    download_t *dload;
    ofmt_t *ofmt;
    const opts_t opts;
    persist_dir_t *persist_dir;
    unsigned failures;
} ctx_t;

typedef struct
{
    persist_item_t persist_item;
    UT_hash_handle hh;
} feed_item_t;

typedef struct
{
    feed_item_t *slots;
    feed_item_t *map;
    unsigned size;
} items_map_t;

typedef enum
{
    ps_success,
    ps_fail_critical,
    ps_fail_feed,
    ps_fail_item,
} process_status_t;

static void feed_map_free(feed_map_t *feed_map)
{
    feed_info_t *feed_info, *tmp;

    HASH_ITER(hh, feed_map->map, feed_info, tmp) {
        if (feed_info->own_persist)
            persist_file_del(feed_info->persist_file);
        HASH_DELETE(hh, feed_map->map, feed_info);
    }
    free(feed_map->slots);
}

static int feed_map_init(feed_map_t *feed_map, ctx_t *ctx)
{
    /* Each configured feed is associated with a feed_info_t object, indexed
     * into feed_map by feed name. */

    feed_info_t *slots;
    unsigned size;
    bool any_enabled = false;

    size = rc_n_feeds(ctx->rc);
    if (size == 0) {
        notice("no feeds defined");
        *feed_map = (feed_map_t){};
        return 0;
    }

    slots = reallocarray(NULL, size, sizeof(*slots));
    if (!slots) {
        warn("reallocarray");
        return -1;
    }

    *feed_map = (feed_map_t){
        .slots = slots,
        .size = size,
    };

    for (unsigned i = 0; i < size; i ++) {
        const rc_feed_t *rc_feed;
        feed_info_t *feed_info;
        size_t namelen;

        rc_feed = rc_get_feed(ctx->rc, i);
        namelen = strlen(rc_feed->name);
        HASH_FIND(hh, feed_map->map, rc_feed->name, namelen, feed_info);

        if (feed_info) {
            warnx("Duplicated feed name in configuration: %s", rc_feed->name);
            goto fail;
        }

        feed_info = slots++;
        *feed_info = (feed_info_t){
            .name = rc_feed->name,
            .config = rc_feed,
            .enabled = ctx->opts.uid
                     ? !strcmp(rc_feed->name, ctx->opts.uid)
                     : true,
            .global_ctx = ctx,
        };

        any_enabled |= feed_info->enabled;

        HASH_ADD_KEYPTR(hh, feed_map->map, feed_info->name, namelen, feed_info);
    }

    if (!any_enabled) {
        warnx("no feed named %s", ctx->opts.uid);
        goto fail;
    }

    return 0;

fail:
    feed_map_free(feed_map);
    return -1;
}

static int feed_info_set_persist(feed_info_t *feed_info)
{
    persist_file_t *persist_file;

    persist_file = persist_file_new(
        feed_info->global_ctx->persist_dir,
        feed_info->name
    );
    if (!persist_file)
        return -1;

    if (persist_file_load(persist_file))
        goto fail;

    if (!persist_file_is_writeable(persist_file))
        goto fail;

    /* The persist_file object we create is not owned by the persist_dir
     * object, thus we have to free it later. */
    feed_info->own_persist = true;
    feed_info->persist_file = persist_file;
    return 0;

fail:
    persist_file_del(persist_file);
    return -1;
}

static void usage(const char *progname)
{
    fprintf(stderr, "Usage: %s [-cDdhqVv] [[-i] identifier]\n", progname);
}

static opts_t read_opts(int argc, char **argv)
{
    int opt;

    opts_t result = {};

    while (opt = getopt(argc, argv, "cDdhi:qVv"), opt != -1)
        switch (opt) {
        case 'c':
            result.catch_up = true;
            break;

        case 'D':
            result.dry_exec = true;
            break;

        case 'd':
            result.dry_mark = true;
            break;

        case 'h':
            usage(argv[0]);
            exit(EXIT_SUCCESS);

        case 'i':
            result.uid = optarg;
            break;

        case 'q':
            g_verbosity_level = 0;
            break;

        case 'V':
            puts(PACKAGE_STRING);
            exit(EXIT_SUCCESS);

        case 'v':
            g_verbosity_level++;
            break;

        default:
            usage(argv[0]);
            exit(EXIT_FAILURE);
        }

    if (!result.uid && optind < argc)
        result.uid = argv[optind++];

    for (int i = optind; i < argc; ++i)
        warnx("Ignoring argument: %s", argv[i]);

    return result;
}

static ofmt_t * setup_ofmt(void)
{
    ofmt_t *ofmt;

    ofmt = ofmt_new();
    if (!ofmt)
        return NULL;

    if (placeholders_setup(ofmt)) {
        ofmt_del(ofmt);
        return NULL;
    }

    return ofmt;
}

static persist_dir_t *setup_persist(const rc_t *rc)
{
    const char *persist_dir_path;
    int basedir;
    persist_dir_t *persist_dir;

    persist_dir_path = rc_get_global(rc)->persist_dir;

    if (mkdir(persist_dir_path, 0755) == -1 && errno != EEXIST)
        warn("mkdir(%s, 0)", persist_dir_path);

    basedir = open(persist_dir_path, O_DIRECTORY | O_RDONLY);
    if (basedir == -1) {
        warn("open(%s, O_DIRECTORY | O_RDONLY)", persist_dir_path);
        return NULL;
    }

    persist_dir = persist_dir_new(basedir);
    if (persist_dir == NULL && close(basedir) == -1)
        warn("close");
    return persist_dir;
}

static int sync_persist_dir(const ctx_t *ctx, feed_map_t *feed_map)
{
    /* Loop over the persisted feeds on the filesystem:
     * - Index the persist_file_t object if the corresponding feed exists in
     *   the feed map (that is, the feed is configured in the rc file;.
     * - Unlink the file if the feed map does not contain it (that is, it was
     *   removed from the configuration file).
     */

    void *aux;
    persist_file_t *persist_file;

    aux = NULL;
    debug("synchronize persist_dir");
    while (persist_file = persist_dir_iter(ctx->persist_dir, &aux), persist_file) {
        const char *pfname;
        unsigned pfnamelen;
        feed_info_t *feed_info;

        pfname = persist_file_name(persist_file);
        pfnamelen = strlen(pfname);

        HASH_FIND(hh, feed_map->map, pfname, pfnamelen, feed_info);
        debug(" persist_file %s, configured %s, enabled %s",
            pfname,
            feed_info ? "yes" : "no",
            feed_info ? (feed_info->enabled ? "yes" : "no") : "n/a"
        );

        if (feed_info && !feed_info->enabled)
            continue;

        if (persist_file_load(persist_file))
            continue;

        if (!feed_info) {
            debug(" unlinking persist_file %s: not in configuration", pfname);
            if (persist_file_unlink(persist_file))
                warnx("Failed to remove persist_file %s", pfname);
            continue;
        }

        if (!persist_file_is_writeable(persist_file)) {
            warnx("skipping feed %s: cannot handle persist_file %s",
                feed_info->name,
                pfname
            );
            HASH_DELETE(hh, feed_map->map, feed_info);
            continue;
        }

        feed_info->persist_file = persist_file;
    }

    return 0;
}

static int items_map_init(items_map_t *imap, const persist_items_t *p_items)
{
    unsigned n;
    feed_item_t *slots;

    n = persist_items_size(p_items);
    if (n == 0) {
        *imap = (items_map_t){};
        return 0;
    }

    slots = reallocarray(NULL, n, sizeof(feed_item_t));
    if (!slots) {
        warn("reallocarray");
        return -1;
    }

    *imap = (items_map_t){
        .slots = slots,
        .size = n,
    };

    debug(" loading from persist_file %u items:", n);
    for (unsigned i = 0; i < n; i ++) {
        const persist_item_t *p_item;
        feed_item_t *f_item;

        p_item = persist_items_get(p_items, i);

        HASH_FIND(hh, imap->map, p_item->data, p_item->len, f_item);
        if (f_item) {
            /* The persist_items implementation does not check for duplications,
             * however this should never happen. The items are in fact loaded
             * into a hash map at every execution, updated, and then stored
             * again.  Still the file might have been tampered with (why?), so
             * we should probably not crash. */
            warnx("Duplicated feed item: %.*s",
                p_item->len,
                (const char *)p_item->data
            );
            return -1;
        }

        f_item = &slots[i];
        *f_item = (feed_item_t){
            .persist_item = *p_item,
        };

        debug("   item guid \"%.*s\"",
            (int)f_item->persist_item.len,
            (char *)f_item->persist_item.data
        );
        HASH_ADD_KEYPTR(hh, imap->map,
            f_item->persist_item.data,
            f_item->persist_item.len,
            f_item
        );
    }

    return 0;
}

static void items_map_free(items_map_t *imap)
{
    feed_item_t *fi, *tmp;

    HASH_ITER(hh, imap->map, fi, tmp)
        HASH_DELETE(hh, imap->map, fi);
    free(imap->slots);
}

static process_status_t parse_feed(const feed_info_t *feed_info,
                                   const filemap_t *fmap,
                                   mrss_t **xml)
{
    mrss_error_t error;
    const char *kind, *reason;

    /* mrss_parse_file requires `char *`.  Old style software, I guess. */
    error = mrss_parse_buffer(fmap->memory, fmap->size, xml);
    if (!error)
        return ps_success;

    switch (error) {
    case MRSS_OK:
    case MRSS_ERR_DOWNLOAD:
        /* We are not using the download facilities embedded in libmrss.
         * This error is therefore not going to happen. */
        panic("unexpected mrss_error_t");

    case MRSS_ERR_POSIX:
        kind = "errno";
        reason = strerror(errno);
        break;
    default:
        kind = "mrss";
        reason = mrss_strerror(error);
    }

    warnx("failed parsing of %s (url: %s, kind: %s, reason: %s)",
        feed_info->name,
        feed_info->config->url,
        kind,
        reason
    );

    /* Even in case of MRSS_ERR_POSIX, it is hard to tell if the error should
     * be considered critical, and no subsequent feed should be processed.
     * For good measure, let's assume the error is affecting only the feed.
     */
    return ps_fail_feed;
}

static process_status_t load_outfmt(feed_info_t *feed_info)
{
    static const ofmt_mode_t ofmt_mode_of[] = {
        [rc_hdl_pipe] = ofmt_mode_pipe,
        [rc_hdl_print] = ofmt_mode_print,
        [rc_hdl_exec] = ofmt_mode_exec,
    };
    const rc_feed_t *config = feed_info->config;
    ofmt_t *ofmt = feed_info->global_ctx->ofmt;

    if (ofmt_compile(ofmt,
                     ofmt_mode_of[config->handler],
                     config->ofmt.spec,
                     config->ofmt.len) == 0) {
        feed_info->ofmt_loaded = true;
        return ps_success;
    }

    warnx("failed to interpret the output format of %s", feed_info->name);
    if (info_enabled)
        ofmt_print_error(ofmt);

    if (ofmt_get_error(ofmt)->reason == ofmt_fail_system)
        return ps_fail_critical;

    return ps_fail_feed;
}

static const char *x_item_guid(const mrss_item_t *x_item)
{
    return x_item->guid ?: x_item->link;
}

static process_status_t process_item_print(const feed_info_t *feed_info,
                                           const mrss_item_t *x_item,
                                           const placeholder_extra_t *extra_data)
{
    char buffer[256];

    printf("ITEM: %s\n", x_item_guid(x_item));
    printf(" incremental_id: %u\n", extra_data->incremental_id);

    #define print_if(fmt, val) if (val) printf(fmt, val)
    print_if(" title: %s\n", x_item->title);
    print_if(" title_type: %s\n", x_item->title_type);
    print_if(" link: %s\n", x_item->link);
    print_if(" description: %s\n", text_short(
        buffer,
        sizeof(buffer),
        x_item->description,
        NULL
    ));
    print_if(" description_type: %s\n", x_item->description_type);
    print_if(" copyright: %s\n", x_item->copyright);
    print_if(" copyright_type: %s\n", x_item->copyright_type);
    print_if(" author: %s\n", x_item->author);
    print_if(" author_url: %s\n", x_item->author_uri);
    print_if(" author_email: %s\n", x_item->author_email);
    print_if(" contributor: %s\n", x_item->contributor);
    print_if(" contributor_uri: %s\n", x_item->contributor_uri);
    print_if(" contributor_email: %s\n", x_item->contributor_email);
    print_if(" comments: %s\n", x_item->comments);
    print_if(" pubDate: %s\n", x_item->pubDate);
    print_if(" guid: %s\n", x_item->guid);
    print_if(" guid_isPermaLink: %d\n", x_item->guid_isPermaLink);
    print_if(" source: %s\n", x_item->source);
    print_if(" source_url: %s\n", x_item->source_url);
    print_if(" enclosure: %s\n", x_item->enclosure);
    print_if(" enclosure_url: %s\n", x_item->enclosure_url);
    print_if(" enclosure_length: %d\n", x_item->enclosure_length);
    print_if(" enclosure_type: %s\n", x_item->enclosure_type);
    #undef print_if

    return ps_success;
}

static process_status_t process_item(feed_info_t *feed_info,
                                     const mrss_item_t *parsed_item,
                                     const placeholder_extra_t *extra_data)
{
    const rc_feed_t *config = feed_info->config;

    if (config->handler == rc_hdl_print && config->ofmt.spec == NULL)
        return process_item_print(feed_info, parsed_item, extra_data);

    if (!feed_info->ofmt_loaded) {
        process_status_t pstatus;

        pstatus = load_outfmt(feed_info);
        if (pstatus != ps_success)
            return pstatus;
    }

    ctx_t *ctx = feed_info->global_ctx;
    placeholders_set_extra(ctx->ofmt, extra_data);

    ofmt_evaluate_params_t params = {
        .item = (mrss_t *)parsed_item,
        .dry_run = ctx->opts.dry_exec,
        .opt_subproc_chdir = feed_info->config->chdir,
    };
    if (ofmt_evaluate(ctx->ofmt, &params) == 0)
        return ps_success;

    warnx("failed to evaluate the output format of %s", feed_info->name);
    if (info_enabled)
        ofmt_print_error(ctx->ofmt);

    return ps_fail_item;
}

static process_status_t process_feed(feed_info_t *feed_info,
                                     const filemap_t *fmap)
{
    process_status_t pstatus;
    items_map_t imap = {};
    mrss_t *xml = NULL;
    unsigned incremental_id;
    bool catch_up = feed_info->global_ctx->opts.catch_up;
    persist_items_t *old_items = NULL, *new_items = NULL;
    unsigned seen_before = 0;

    info("processing %s", feed_info->name);

    new_items = persist_items_new(0);
    if (!new_items) {
        pstatus = ps_fail_critical;
        goto exit;
    }

    /* If old_items is NULL, this feed is processed for the first time. */
    old_items = persist_file_swap_items(feed_info->persist_file, NULL);

    if (old_items && items_map_init(&imap, old_items)) {
        pstatus = ps_fail_critical;
        goto exit;
    }

    pstatus = parse_feed(feed_info, fmap, &xml);
    if (pstatus != ps_success)
        goto exit;

    incremental_id = persist_file_get_incrid(feed_info->persist_file);
    debug(" loading from XML");
    for (mrss_item_t *x_item = xml->item; x_item; x_item = x_item->next) {
        const char *uid;
        size_t uid_len;
        feed_item_t *f_item;

        uid = x_item->guid ?: x_item->link;
        if (!uid) {
            notice("Bad item in %s: missing guid and link", feed_info->name);
            continue;
        }
        uid_len = strlen(uid);

        HASH_FIND(hh, imap.map, uid, uid_len, f_item);
        debug(" %c item guid \"%s\"", f_item ? '=' : '+', uid);

        if (f_item) {
            /* Seen before.  The item is added to the new set of items, as it
             * has not been removed from the feed XML. */
            seen_before ++;
            if (persist_items_add(new_items, &f_item->persist_item)) {
                pstatus = ps_fail_critical;
                goto exit;
            }
            continue;
        }

        if (!catch_up) {
            pstatus = process_item(feed_info, x_item,
                &(placeholder_extra_t){
                    .incremental_id = incremental_id,
                    .feed_title = xml->title,
                    .feed_identifier = feed_info->name,
                }
            );

            switch (pstatus) {
            case ps_success:
                break;
            case ps_fail_critical:
            case ps_fail_feed:
                goto exit;
            case ps_fail_item:
                feed_info->global_ctx->failures ++;
                continue;
            }
        }

        if (persist_items_add(new_items, &(persist_item_t){
                .data = (void *)uid,
                .len = uid_len,
                })) {
            pstatus = ps_fail_critical;
            goto exit;
        }

        incremental_id ++;
    }

    /* Transfer ownership of the parsed xml to new_items */
    persist_items_set_store(new_items, xml, (void (*)(void *))mrss_free);
    xml = NULL;

    if (info_enabled) {
        unsigned unseen;

        unseen = persist_items_size(new_items) - seen_before;

        if (debug_enabled)
            debug(" updating with %u new items", unseen);
        else if (unseen > 0)
            info("updates for %s: %u items", feed_info->name, unseen);
    }

    /* Transfer ownership of new_items to the persist_file */
    (void)persist_file_swap_items(feed_info->persist_file, new_items);
    new_items = NULL;

    debug(" incremental id is %u", incremental_id);
    persist_file_set_incrid(feed_info->persist_file, incremental_id);

    bool skip_write = feed_info->global_ctx->opts.dry_mark;
    if (!skip_write && persist_file_write(feed_info->persist_file)) {
        pstatus = ps_fail_feed;
        goto exit;
    }

    pstatus = ps_success;
exit:
    if (xml)
        mrss_free(xml);
    items_map_free(&imap);
    persist_items_del(old_items);
    persist_items_del(new_items);

    return pstatus;
}

static int on_downloaded(void *opaque,
                         download_status_t dstatus,
                         const char *errmsg,
                         const filemap_t *file_map)
{
    feed_info_t *feed_info = opaque;
    process_status_t pstatus = ps_fail_feed;

    switch (dstatus) {
    case dl_complete:
        pstatus = process_feed(feed_info, file_map);
        break;

    case dl_sys_failure:
        pstatus = ps_fail_critical;
    case dl_fetch_failure:
        notice("failed to download %s: %s", feed_info->name, errmsg);
        break;

    case dl_aborted:
        notice("aborted download of %s (url: %s)",
            feed_info->name,
            feed_info->config->url
        );
        break;
    }

    if (pstatus == ps_fail_feed)
        feed_info->global_ctx->failures ++;

    /* Only (err == -1) is going to interrupt all the downloads. */
    return pstatus == ps_fail_critical ? -1 : 0;
}

static int schedule_feed_download(feed_info_t *feed_info)
{
    debug("schedule retrieval of url %s for %s",
        feed_info->config->url,
        feed_info->name
    );
    return download_schedule(
        feed_info->global_ctx->dload,
        feed_info->config->url,
        on_downloaded,
        feed_info
    );
}

static int run(ctx_t *ctx)
{
    int e = -1;
    feed_map_t feed_map;

    if (feed_map_init(&feed_map, ctx))
        return -1;

    if (sync_persist_dir(ctx, &feed_map))
        goto exit;

    for (unsigned i = 0; i < feed_map.size; ++i) {
        feed_info_t *feed_info = &feed_map.slots[i];

        if (!feed_info->enabled)
            continue;

        if (!feed_info->persist_file) {
            /* We do not have a persist_file yet.  This happens when we
             * handle a feed for the first time: the feed is listed in the
             * configuration, but the persist_dir did not yield a
             * persist_file for it.
             */
            if (feed_info_set_persist(feed_info)) {
                notice("skipping %s", feed_info->name);
                continue;
            }
        }
        info("scheduling fetch for %s", feed_info->name);

        if (schedule_feed_download(feed_info))
            goto exit;
    }

    if (download_perform(ctx->dload))
        goto exit;

    e = ctx->failures ? -1 : 0;
exit:
    feed_map_free(&feed_map);
    return e;
}

int main(int argc, char **argv)
{
    int e = -1;
    rc_t *rc = NULL;
    download_t *dload = NULL;
    ofmt_t *ofmt = NULL;
    opts_t opts;
    persist_dir_t *persist_dir = NULL;

#ifdef HAVE_PLEDGE
    if (pledge("stdio rpath wpath cpath inet dns proc exec", NULL) == -1)
        err(EXIT_FAILURE, "pledge");
#endif

    opts = read_opts(argc, argv);

    rc = rc_new();
    if (!rc)
        goto exit;

    if (rc_load(rc))
        goto exit;

    ofmt = setup_ofmt();
    if (!ofmt)
        goto exit;

    dload = download_new(true, rc_get_global(rc)->jobs);
    if (!dload)
        goto exit;

    persist_dir = setup_persist(rc);
    if (!persist_dir)
        goto exit;

    e = run(&(ctx_t){
        .rc = rc,
        .dload = dload,
        .ofmt = ofmt,
        .opts = opts,
        .persist_dir = persist_dir,
    });
exit:
    persist_dir_del(persist_dir);
    download_del(dload);
    ofmt_del(ofmt);
    rc_free(rc);
    return e ? EXIT_FAILURE : EXIT_SUCCESS;
}
