/* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
/*
 * Author: Charles Kerr <charles@rebelbase.com>
 *
 * Copyright (C) 2000, 2001  Pan Development Team <pan@rebelbase.com>
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 * 
 */

#include <config.h>

#include <glib.h>

#include <ctype.h>
#include <stdlib.h>
#include <string.h>

#include <pan/base/article.h>
#include <pan/base/article-thread.h>
#include <pan/base/base-prefs.h>
#include <pan/base/debug.h>
#include <pan/base/pan-i18n.h>
#include <pan/base/pan-glib-extensions.h>

/**
 * Skip the "Re: " part of a subject header, if any
 * @param subject
 * @return the non-"Re:" portion of the subject header
 */
#define skip_reply_leader(a) \
	(((a!=NULL) && \
	  (a[0]=='R' || a[0]=='r') && \
	  (a[1]=='E' || a[1]=='e') && \
	  (a[2]==':') && \
	  (a[3]==' ')) ? a+4 : a)

/**
 * Normalized Article
 */
typedef struct
{
	gchar * subject;
	gboolean is_reply;
	Article * a;
}
Norm;

/**
 * Normalizing a subject header involves tearing out the multipart
 * substrings ("(21/42)" or "[12|213]") and converting it all to
 * one case so that we can use strcmp instead of g_strcasecmp.
 *
 * When we're threading articles, it's a big speedup to normalize the
 * subjects at the outset instead of normalizing them in each comparison.
 */
static gchar*
normalize_subject (gchar * buf, const Article * a)
{
	const gboolean multipart = a->parts != 0;
	const gchar * in = skip_reply_leader (a->subject);
	gchar * out = buf;

	/* skip the leading noise */
	while (*in && !isalnum((int)*in) && !isdigit((int)*in))
		++in;

	while (*in)
	{
		/* strip multipart information */
		if (multipart && (*in=='('||*in=='[') && isdigit((int)in[1])) {
			const char ch = *in=='(' ? ')' : ']';
			while (*in && *in!=ch)
				++in;
			continue;
		}

		/* strip out junk that breaks sorting  */
		if (isalnum((int)*in) || isdigit((int)*in) || isspace((int)*in))
			*out++ = ('A'<=*in && *in<='Z') ? tolower(*in) : *in;

		++in;
	}

	*out = '\0';
	return buf;
}


/**
 * This Normalizes a group of articles in just two memory blocks.
 * These blocks will need to be g_free()d when the client is done with them.
 */
static void
normalize_articles (Article    ** articles,
                    gint          qty,
                    Norm       ** alloc_and_setme_norm,
                    gchar      ** alloc_and_setme_str)
{
	gint i;
	glong str_buf_idx;
	glong str_len;
	gchar * str_buf;
	Norm * norm_buf;

	/* sanity clause */
	g_return_if_fail (articles!=NULL);
	g_return_if_fail (qty>0);
	g_return_if_fail (alloc_and_setme_norm!=NULL);
	g_return_if_fail (alloc_and_setme_str!=NULL);

	/* alloc a buf for the norms */
	*alloc_and_setme_norm = norm_buf = g_new (Norm, qty);

	/* alloc a buf for the subject */
	str_len = 0;
	for (i=0; i<qty; ++i)
		str_len += strlen (articles[i]->subject) + 2;
	*alloc_and_setme_str = str_buf = g_new (char, str_len);
	
	/* normalize the articles */
	str_buf_idx = 0;
	for (i=0; i<qty; ++i) {
		Article * a = articles[i];
		norm_buf[i].a = a;
		norm_buf[i].is_reply = skip_reply_leader (a->subject) != a->subject ? 1 : 0;
		norm_buf[i].subject = normalize_subject (str_buf+str_buf_idx, a);
		str_buf_idx += strlen(norm_buf[i].subject) + 1;
	}
}

static int
compare_pN_to_pN_by_subject (const void * va, const void * vb)
{
	register int value;
	const register Norm * a = (const Norm *)va;
	const register Norm * b = (const Norm *)vb;

	/* subject is the primary key, of course... */
	if ((value = *a->subject - *b->subject))
		return value;
	if ((value = strcmp (a->subject, b->subject)))
		return value;

	/* if one but not both is a reply, the reply goes second */
	if (a->is_reply != b->is_reply)
		return a->is_reply ? 1 : -1;

	/* check multipart */
	if ((value = a->a->part - b->a->part))
		return value;

	/* oldest goes first... */
	return (int) difftime (a->a->date, b->a->date);
}


static int
compare_ppA_to_ppA_by_linecount (const void* va, const void* vb)
{
        const Article * a;
        const Article * b;
	long a_loc;
	long b_loc;

	/* get a's linecount... */
	a = *(const Article**)va;
	a_loc = a->linecount;
	if (a->parts!=0 && a->threads!=NULL) {
		GSList * l;
		for (l=a->threads; l; l=l->next)
			a_loc += ARTICLE(l->data)->linecount;
	}

	/* get b's linecount... */
	b = *(const Article**)vb;
	b_loc = b->linecount;
	if (b->parts!=0 && b->threads!=NULL) {
		GSList * l;
		for (l=b->threads; l; l=l->next)
			b_loc += ARTICLE(l->data)->linecount;
	}

	/* return the difference... */
	return a_loc - b_loc;
} 

static int
compare_ppA_to_ppA_by_action (const void * va, const void * vb)
{
	const Article * a = *(const Article **)va;
	const Article * b = *(const Article **)vb;
	gint ia, ib;

	ia = article_flag_on (a, STATE_SAVE_QUEUED) ? 1 : 0;
	ib = article_flag_on (b, STATE_SAVE_QUEUED) ? 1 : 0;
	if (ia != ib)
		return ib - ia;

	ia = article_flag_on (a, STATE_DOWNLOAD_FLAGGED) ? 1 : 0;
	ib = article_flag_on (b, STATE_DOWNLOAD_FLAGGED) ? 1 : 0;
	if (ia != ib)
		return ib - ia;

	ia = article_flag_on (a, STATE_DECODED) ? 1 : 0;
	ib = article_flag_on (b, STATE_DECODED) ? 1 : 0;
	if (ia != ib)
		return ib - ia;

	ia = article_has_body (a) ? 1 : 0;
	ib = article_has_body (b) ? 1 : 0;
	if (ia != ib)
		return ib - ia;

	return 0;
}

static int
compare_ppA_to_ppA_by_read (const void * va, const void * vb)
{
	const Article * a = *(const Article **)va;
	const Article * b = *(const Article **)vb;
	const gboolean a_is_read = article_is_read (a);
	const gboolean b_is_read = article_is_read (b);
	const gboolean a_is_new = article_is_new (a);
	const gboolean b_is_new = article_is_new (b);
	gint ia, ib;

	ia = (a_is_new?1:0) + a->new_children;
	ib = (b_is_new?1:0) + b->new_children;
	if (ia != ib)
		return ib - ia;

	ia = (a_is_read?0:1) + a->unread_children;
	ib = (b_is_read?0:1) + b->unread_children;
	if (ia != ib)
		return ib - ia;

	ia = a_is_read ? 0 : 1;
	ib = b_is_read ? 0 : 1;
	if (ia != ib)
		return ib - ia;

	ia = article_flag_on (a, STATE_MULTIPART_ALL) ? 1 : 0;
	ib = article_flag_on (b, STATE_MULTIPART_ALL) ? 1 : 0;
	if (ia != ib)
		return ib - ia;

	ia = article_flag_on (a, STATE_MULTIPART_SOME) ? 1 : 0;
	ib = article_flag_on (b, STATE_MULTIPART_SOME) ? 1 : 0;
	if (ia != ib)
		return ib - ia;

	return 0;
}


static int
compare_ppA_to_ppA_by_date (const void* va, const void* vb)
{
	return (int) difftime ((**(const Article**)va).date,
	                       (**(const Article**)vb).date);
}

static int
compare_ppA_to_ppA_by_message_id (const void* a, const void* b)
{
	const gchar * msg_id_a = article_get_message_id (*(const Article**)a);
	const gchar * msg_id_b = article_get_message_id (*(const Article**)b);
	return strcmp (msg_id_a, msg_id_b);
}

typedef struct
{
	gchar * data;
	Article * article;
}
ArticleStruct;

static int
compare_pAS_to_pAS_by_data (const void * va, const void * vb)
{
	const ArticleStruct * a = (const ArticleStruct*)va;
	const ArticleStruct * b = (const ArticleStruct*)vb;
	return strcmp (a->data, b->data);
}

void
sort_articles (Article      ** buf,
               size_t          article_qty,
               int             sort_type,
               gboolean        ascending)
{
	switch (sort_type)
	{
		case ARTICLE_SORT_AUTHOR:
		{
			size_t i;
			ArticleStruct * as = g_new (ArticleStruct, article_qty);
			for (i=0; i<article_qty; ++i)
			{
				gchar author[256];
				article_get_short_author_str (buf[i], author, sizeof(author));
				as[i].data = g_strdup (author);
				as[i].article = buf[i];
				g_strdown (as[i].data);
			}
			msort (as,
			       article_qty,
			       sizeof(ArticleStruct),
			       compare_pAS_to_pAS_by_data);
			for (i=0; i<article_qty; ++i) {
				buf[i] = as[i].article;
				g_free (as[i].data);
			}
			g_free (as);
			break;
		}
		case ARTICLE_SORT_LINES:
		{
			msort (buf, article_qty, sizeof(Article*), compare_ppA_to_ppA_by_linecount);
			break;
		}
		case ARTICLE_SORT_DATE:
		{
			msort (buf, article_qty, sizeof(Article*), compare_ppA_to_ppA_by_date);
			break;
		}
		case ARTICLE_SORT_MSG_ID:
		{
			msort (buf, article_qty, sizeof(Article*), compare_ppA_to_ppA_by_message_id);
			break;
		}
		case ARTICLE_SORT_ACTION_STATE:
		{
			msort (buf, article_qty, sizeof(Article*), compare_ppA_to_ppA_by_action);
			break;
		}
		case ARTICLE_SORT_READ_STATE:
		{
			msort (buf, article_qty, sizeof(Article*), compare_ppA_to_ppA_by_read);
			break;
		}
		case ARTICLE_SORT_SUBJECT:
		default:
		{
			gint i;
			Norm * norm_buf = NULL;
			gchar * str_buf = NULL;
			normalize_articles (buf, article_qty, &norm_buf, &str_buf);
			msort (norm_buf, article_qty, sizeof(Norm), compare_pN_to_pN_by_subject);
			for (i=0; i<article_qty; ++i)
				buf[i] = ARTICLE(norm_buf[i].a);
			g_free (norm_buf);
			g_free (str_buf);
		}
	}

	/* if not ascending, reverse the order */
	if (!ascending) {
		const size_t mid = article_qty/2;
		size_t i;
		for (i=0; i!=mid; ++i) { /* swap */
			Article * tmp = buf[i];
			buf[i] = buf[article_qty-1-i];
			buf[article_qty-1-i] = tmp;
		}
	}
}


static gboolean
is_child_of (const Article * child,
             const Article * parent)
{
	g_return_val_if_fail (child!=NULL, FALSE);
	g_return_val_if_fail (parent!=NULL, FALSE);

	for (;;)
	{
		if (!child)
			return FALSE;
		if (child == parent)
			return TRUE;
		child = child->parent;
	}
}

/**
 * Thread the articles specified in list
 */
void
thread_articles (GPtrArray    * articles,
                 StatusItem   * status)
{
	guint i;
	guint qty = articles->len;
	Article ** refs;
	Article search_a;
	Article * p_search_a=&search_a;
	gchar * norm_str_buf;
	Norm * norm;
	Norm * sorted_norm;
	GArray * buf = NULL;

	g_return_if_fail (articles!=NULL);
	if (qty<1 || !articles)
		return;

	if (break_thread_when_subject_changes)
		buf = g_array_new (FALSE, FALSE, 1);

	if (status != NULL)
		status_item_emit_status_va (status, _("Threading %u articles"), qty);
	/* make a plausiably-legal article FIXME: indent this right after the string freeze is over*/ search_a.number = 1; search_a.subject = "dummy subject"; search_a.references = NULL;
	/* unthread the articles, just in case they were threaded before */
	for (i=0; i!=qty; ++i) {
		Article * a = ARTICLE(g_ptr_array_index(articles,i));
		a->parent = NULL;
		a->unread_children = 0;
		a->new_children = 0;
		g_slist_free (a->threads);
		a->threads = NULL;
	}

	/* make a message-id-sorted array of the articles */
	refs = g_memdup (articles->pdata, sizeof(gpointer)*qty);
	qsort (refs, qty, sizeof(Article*), compare_ppA_to_ppA_by_message_id);

	/* normalize the articles */
	norm = NULL;
	norm_str_buf = NULL;
	normalize_articles ((Article**)articles->pdata, qty, &norm, &norm_str_buf);

	/* sort the normalized articles */
	sorted_norm = g_memdup (norm, sizeof(Norm)*qty);
	qsort (sorted_norm, qty, sizeof(Norm), compare_pN_to_pN_by_subject);

	/* thread the articles */
	for (i=0; i!=qty; ++i)
	{
		const gchar * references;
		Article * parent = NULL;
		Article * a = ARTICLE(g_ptr_array_index(articles,i));
		gint index = -1;

		/* let the user know what we're doing */
		if (status != NULL) {
			status_item_emit_next_step (status);
			if (i==qty-1 || !(i % 256))
				status_item_emit_status_va (status,
					_("Threaded %u of %u articles"), i, qty);
		}

		/* thread by reference
		   (except for parts 2...n of multiparts, which need to be threaded by multipart) */
		references = article_get_header (a, HEADER_REFERENCES);
		if (a->parts<2 && references!=NULL && *references=='<')
		{
			gchar * tmp_references = g_strdup (references);
			gchar * message_id = strrchr (tmp_references, '<');

			while (parent==NULL && is_nonempty_string(message_id))
			{
				gboolean exact = FALSE;

				search_a.message_id = message_id;

				index = lower_bound (&p_search_a,
						     refs,
						     qty,
						     sizeof(Article*),
						     compare_ppA_to_ppA_by_message_id,
						     &exact);

				/* if we found the ancestor & it's worthy, thread it */
				if (exact && !is_child_of(refs[index],a))
				{
					gboolean subject_changed = FALSE;

					if (break_thread_when_subject_changes)
					{
						const gchar * new_subject = norm[i].subject;
						const gchar * old_subject = refs[index]->subject;
						g_array_set_size (buf, strlen(old_subject)+1);
						normalize_subject (buf->data, refs[index]);
						subject_changed = strcmp (buf->data, new_subject);
					}

					if (!subject_changed)
						parent = refs[index];
				}

				/* if we couldn't find the ancestor, march up the References string */
				*message_id = '\0';
				g_strchomp (tmp_references);
				message_id = strrchr (tmp_references, '<');
			}

			g_free (tmp_references);
		}


		/* thread by multipart */
		if (!parent && a->parts>1 && a->part>1)
		{
			Norm n = norm[i];
			search_a.part = 1;
			search_a.date = 0; /* unlikely to get an exact match.. :) */
			n.a = &search_a;

			index = lower_bound (&n,
			                     sorted_norm,
			                     qty,
			                     sizeof(Norm),
			                     compare_pN_to_pN_by_subject,
			                     NULL);

			if (0<=index && index<qty)
			{
				Norm * match = &sorted_norm[index];
				if ((match->a != a)
					&& (match->a->parts == a->parts)
					&& (!strcmp(match->subject,n.subject))
					&& (!is_child_of(match->a,a)))
				{
					parent = match->a;
				}
			}
		}

		/* thread by subject */
		if (!parent && skip_reply_leader(a->subject)!=a->subject)
		{
			Norm n = norm[i];
			search_a.part = 0;
			search_a.date = 0; /* unlikely to get an exact match.. :) */
			n.a = &search_a;

			index = lower_bound (
				&n,
				sorted_norm,
				qty,
				sizeof(Norm),
				compare_pN_to_pN_by_subject,
				NULL);

			if (0<=index && index<qty && !is_child_of(sorted_norm[index].a,a))
			{
				Norm * match = &sorted_norm[index];

				if (!strcmp(match->subject,n.subject))
				{
					/* 1 original, 1 reply */
					parent = match->a;
				}
				else if (!strcmp(match->subject, a->subject) && difftime(match->a->date,a->date)<0)
				{
					/* 2 replies, no top --  oldest on top */
					parent = match->a;
				}
			}
		}

		if (parent != NULL) /* this article has a parent */
		{
			g_assert (!is_child_of(parent,a));

			/* link the two articles */
			a->parent = parent;
			parent->threads = g_slist_prepend (parent->threads, norm+i);
		}
	}

	/* right now all the children are normalized; point to articles */
	for (i=0; i!=qty; ++i) {
		GSList * l;
		Article * a = ARTICLE(g_ptr_array_index(articles,i));
		a->threads = g_slist_sort (a->threads, compare_pN_to_pN_by_subject);
		for (l=a->threads; l!=NULL; l=l->next)
			l->data = ((Norm*)l->data)->a;
	}

	/* calculate new/unread child counts */
	for (i=0; i!=qty; ++i)
	{
		Article * a;

		a = ARTICLE(g_ptr_array_index(articles,i));
		if (a->parent!=NULL && !article_is_read(a))
			for (a=a->parent; a!=NULL; a=a->parent)
				++a->unread_children;

		a = ARTICLE(g_ptr_array_index(articles,i));
		if (a->parent!=NULL && article_is_new(a))
			for (a=a->parent; a!=NULL; a=a->parent)
				++a->new_children;
	}

	/* cleanup */
	g_free (refs);
	g_free (norm);
	g_free (norm_str_buf);
	g_free (sorted_norm);
	if (buf != NULL)
		g_array_free (buf, TRUE);
}


void
check_multipart_articles (GPtrArray * articles)
{
	guint i;
	gint j;

	/* entry assertions */
	g_return_if_fail (articles!=NULL);

	/* set the multipart state (all/partial) */
	for (i=0; i!=articles->len; ++i)
	{
		GSList * p;
		Article * a = ARTICLE(g_ptr_array_index(articles,i));

		/* clear old state */
		a->state &= ~(STATE_MULTIPART_ALL&STATE_MULTIPART_SOME);

		/* not a multipart because it has no parts */
		if (!a->parts)
			continue;

		/* not a multipart because it's not the first of the set */
		if (a->part!=1)
			continue;

		/* handle the single-part attachment message */
		if (a->parts==1) {
			a->state |= STATE_MULTIPART_ALL;
			continue;
		}

		/* make sure we have each multipart. */
		for (j=a->part+1, p=a->threads;
		     j<=a->parts && p!=NULL;
		     p=p->next)
		{
			Article* b = ARTICLE(p->data);
			if (b->part > j)
				break; /* some */
			else if (b->part == j)
				++j; /* okay so far */
			else
				; /* a repost of a multipart section? */
		}
		if (j==a->parts+1) {
			a->state |= STATE_MULTIPART_ALL;
		} else {
			a->state |= STATE_MULTIPART_SOME;
		}
	}
}
