#include <sys/socket.h>
#include <sys/types.h>

#include <ctype.h>
#include <errno.h>
#include <netdb.h>
#include <stdarg.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>

#include "https.h"
#include "util.h"
#include "youtube.h"
#include "xml.h"

#define STRP(s) s,sizeof(s)-1

/* temporary variables to copy for states */
static char id[256], userid[256];

/* states */
static int metainfocount;
static enum ItemState {
	None  = 0,
	Item  = 1, Pager = 2,
	Metainfo = 4, Title = 8, User = 16, Videotime = 32,
} state;

static struct item *videos;
static size_t nvideos;

static char *
youtube_request(const char *path)
{
	return request("www.youtube.com", path,
	               "User-Agent: Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)\r\n");
}

static int
isclassmatch(const char *classes, const char *clss, size_t len)
{
	const char *p;

	if (!(p = strstr(classes, clss)))
		return 0;
	return (p == classes || isspace((unsigned char)p[-1])) &&
	        (isspace((unsigned char)p[len]) || !p[len]);
}

/* XML/HTML entity conversion */
static const char *
entitytostr(const char *s)
{
	static char buf[16];
	ssize_t len;

	if ((len = xml_entitytostr(s, buf, sizeof(buf))) > 0)
		return buf;

	return s;
}

static void
xmlattr(XMLParser *x, const char *t, size_t tl, const char *a, size_t al,
        const char *v, size_t vl)
{
	/* grouped channel index, used for channelid and channel title */
	static int grouped = -1;

	if (!strcmp(t, "div") && !strcmp(a, "class") && isclassmatch(v, STRP("search-pager"))) {
		/* last video */
		if (nvideos < MAX_VIDEOS && videos[nvideos].linktype) {
			if (grouped != -1 && !videos[nvideos].channelid[0]) {
				strlcpy(videos[nvideos].channelid, videos[grouped].channelid, sizeof(videos[nvideos].channelid));
				strlcpy(videos[nvideos].channeltitle, videos[grouped].channeltitle, sizeof(videos[nvideos].channeltitle));
			}
			nvideos++;
		}
		state &= ~Item;
		state |= Pager;
	}

	if (nvideos >= MAX_VIDEOS)
		return;

	if (!strcmp(t, "div") && !strcmp(a, "class") &&
		isclassmatch(v, STRP("yt-lockup"))) {
		state |= Item;
		if (videos[nvideos].linktype) {
			if (videos[nvideos].channelid[0] || videos[nvideos].userid[0] ||
			    videos[nvideos].linktype != Video)
				grouped = -1;
			if (videos[nvideos].linktype == Channel)
				grouped = nvideos;
			if (grouped != -1 && !videos[nvideos].channelid[0]) {
				strlcpy(videos[nvideos].channelid, videos[grouped].channelid, sizeof(videos[nvideos].channelid));
				strlcpy(videos[nvideos].channeltitle, videos[grouped].channeltitle, sizeof(videos[nvideos].channeltitle));
			}
			nvideos++;
		}
		if (strstr(v, " yt-lockup-channel "))
			videos[nvideos].linktype = Channel;
		else if (strstr(v, "yt-lockup-movie-"))
			videos[nvideos].linktype = Movie;
		else if (strstr(v, " yt-lockup-playlist "))
			videos[nvideos].linktype = Playlist;
		if (strstr(v, " yt-lockup-video "))
			videos[nvideos].linktype = Video;
	}
	if (!(state & Item))
		return;

	if (!strcmp(t, "span") && !strcmp(a, "class") && isclassmatch(v, STRP("video-time")))
		state |= Videotime;
	if (!strcmp(t, "ul") && !strcmp(a, "class") && isclassmatch(v, STRP("yt-lockup-meta-info"))) {
		state |= Metainfo;
		metainfocount = 0;
	}
	if (!strcmp(t, "h3") && !strcmp(a, "class") && isclassmatch(v, STRP("yt-lockup-title")))
		state |= Title;
	if (!strcmp(t, "div") && !strcmp(a, "class") && isclassmatch(v, STRP("yt-lockup-byline")))
		state |= User;

	if ((state & Title) && !strcmp(t, "a") && !strcmp(a, "title")) {
		if (videos[nvideos].linktype == Channel)
			strlcat(videos[nvideos].channeltitle, v, sizeof(videos[nvideos].channeltitle));
		else
			strlcat(videos[nvideos].title, v, sizeof(videos[nvideos].title));
	}

	if ((state & Title) && !strcmp(t, "a") && !strcmp(a, "href"))
		strlcat(id, v, sizeof(id));

	if (!strcmp(t, "button") && !strcmp(a, "data-channel-external-id"))
		strlcat(videos[nvideos].channelid, v, sizeof(videos[nvideos].channelid));

	if ((state & User) && !strcmp(t, "a") && !strcmp(a, "href"))
		strlcat(userid, v, sizeof(userid));
}

static void
xmlattrentity(XMLParser *x, const char *t, size_t tl, const char *a, size_t al,
              const char *v, size_t vl)
{
	const char *s;

	if (!(state & Pager) && nvideos >= MAX_VIDEOS)
		return;

	s = entitytostr(v);
	xmlattr(x, t, tl, a, al, s, strlen(s));
}

static void
xmldata(XMLParser *x, const char *d, size_t dl)
{
	if ((state & Pager))
		return;

	/* optimization: no need to process and must not process videos after this */
	if (!state || nvideos >= MAX_VIDEOS)
		return;

	/* use parsed link type for meta info since this metainfo differs per type like:
	   channel, playlist, video */
	if ((state & Metainfo)) {
		switch (videos[nvideos].linktype) {
		case Playlist:
			break; /* ignore */
		case Channel:
			if (metainfocount == 1)
				strlcat(videos[nvideos].channelvideos, d, sizeof(videos[nvideos].channelvideos));
			break;
		default:
			if (metainfocount == 1)
				strlcat(videos[nvideos].publishedat, d, sizeof(videos[nvideos].publishedat));
			else if (metainfocount == 2)
				strlcat(videos[nvideos].viewcount, d, sizeof(videos[nvideos].viewcount));
		}
	}
	if ((state & Videotime) && !strcmp(x->tag, "span"))
		strlcat(videos[nvideos].duration, d, sizeof(videos[nvideos].duration));
	if ((state & User) && !strcmp(x->tag, "a"))
		strlcat(videos[nvideos].channeltitle, d, sizeof(videos[nvideos].channeltitle));
}

static void
xmldataentity(XMLParser *x, const char *d, size_t dl)
{
	const char *s;

	/* optimization: no need for entity conversion */
	if (!state || nvideos >= MAX_VIDEOS)
		return;

	s = entitytostr(d);
	xmldata(x, s, strlen(s));
}

static void
xmltagend(XMLParser *x, const char *t, size_t tl, int isshort)
{
	char *p;

	if ((state & Metainfo) && !strcmp(t, "ul"))
		state &= ~Metainfo;
	if ((state & Title) && !strcmp(t, "h3")) {
		state &= ~Title;

		if (nvideos >= MAX_VIDEOS)
			return;

		if (!strncmp(id, "/watch", sizeof("/watch") - 1)) {
			if (!videos[nvideos].linktype)
				videos[nvideos].linktype = Video;
			if ((p = getparam(id, "v"))) {
				if (decodeparam(videos[nvideos].id, sizeof(videos[nvideos].id), p) == -1)
					videos[nvideos].id[0] = '\0';
			}
		}

		id[0] = '\0';
	}
	if ((state & User)) {
		state &= ~User;

		if (nvideos >= MAX_VIDEOS)
			return;

		/* can be user or channel */
		if (!strncmp(userid, "/channel/", sizeof("/channel/") - 1)) {
			strlcpy(videos[nvideos].channelid,
				userid + sizeof("/channel/") - 1,
				sizeof(videos[nvideos].channelid));
		} else if (!strncmp(userid, "/user/", sizeof("/user/") - 1)) {
			strlcpy(videos[nvideos].userid,
				userid + sizeof("/user/") - 1,
				sizeof(videos[nvideos].userid));
		}

		userid[0] = '\0';
	}
	if ((state & Videotime))
		state &= ~Videotime;
}

static void
xmltagstart(XMLParser *x, const char *t, size_t tl)
{
	if ((state & Metainfo) && !strcmp(t, "li"))
		metainfocount++;
}

static char *
request_search(const char *s, const char *chan, const char *user,
               const char *page, const char *order)
{
	char path[4096];

	/* when searching in channel or user but the search string is empty:
	   fake a search with a single space. */
	if ((chan[0] || user[0]) && !s[0])
		s = "+";

	if (user[0])
		snprintf(path, sizeof(path), "/user/%s/search?query=%s", user, s);
	else if (chan[0])
		snprintf(path, sizeof(path), "/channel/%s/search?query=%s", chan, s);
	else
		snprintf(path, sizeof(path), "/results?search_query=%s", s);

	if (page[0]) {
		strlcat(path, "&page=", sizeof(path));
		strlcat(path, page, sizeof(path));
	}

	if (order[0]) {
		strlcat(path, "&search_sort=", sizeof(path));
		if (!strcmp(order, "date"))
			strlcat(path, "video_date_uploaded", sizeof(path));
		else if (!strcmp(order, "relevance"))
			strlcat(path, "video_relevance", sizeof(path));
		else if (!strcmp(order, "views"))
			strlcat(path, "video_view_count", sizeof(path));
	}

	/* force older youtube layout, else youtube will try to randomly serve
	   a new layout sometimes breaking the parsing */
	strlcat(path, "&disable_polymer=1", sizeof(path));

	/* check if request is too long (truncation) */
	if (strlen(path) >= sizeof(path) - 1)
		return NULL;

	return youtube_request(path);
}

struct search_response *
youtube_search(const char *rawsearch, const char *chan, const char *user,
               const char *page, const char *order)
{
	struct search_response *r;
	XMLParser x = { 0 };
	char *data, *s;

	if (!(data = request_search(rawsearch, chan, user, page, order)))
		return NULL;
	if (!(s = strstr(data, "\r\n\r\n")))
		return NULL; /* invalid response */
	/* skip header */
	s += strlen("\r\n\r\n");

	if (!(r = calloc(1, sizeof(*r))))
		return NULL;

	nvideos = 0;
	videos = r->items;

	x.xmlattr = xmlattr;
	x.xmlattrentity = xmlattrentity;
	x.xmldata = xmldata;
	x.xmldataentity = xmldataentity;
	x.xmltagend = xmltagend;
	x.xmltagstart = xmltagstart;

	setxmldata(s, strlen(s));
	xml_parse(&x);

	r->nitems = nvideos;

	return r;
}
