initial repo, quick hack - sub - subscene.com subtitle search
(HTM) git clone git://git.codemadness.org/sub
(DIR) Log
(DIR) Files
(DIR) Refs
(DIR) README
(DIR) LICENSE
---
(DIR) commit 0f97d1bd0a8f55ffad37d17e5d7080576e6db684
(HTM) Author: Hiltjo Posthuma <hiltjo@codemadness.org>
Date: Sun, 19 Oct 2014 12:49:06 +0000
initial repo, quick hack
Diffstat:
A Makefile | 5 +++++
A sub-dl | 11 +++++++++++
A sub-search | 18 ++++++++++++++++++
A sub.c | 170 +++++++++++++++++++++++++++++++
A util.c | 35 +++++++++++++++++++++++++++++++
A util.h | 2 ++
A xml.c | 325 +++++++++++++++++++++++++++++++
A xml.h | 49 +++++++++++++++++++++++++++++++
8 files changed, 615 insertions(+), 0 deletions(-)
---
(DIR) diff --git a/Makefile b/Makefile
@@ -0,0 +1,5 @@
+build: clean
+ cc xml.c util.c sub.c -o sub
+
+clean:
+ rm -f sub *.o
(DIR) diff --git a/sub-dl b/sub-dl
@@ -0,0 +1,11 @@
+#!/bin/sh
+
+url=$(curl "$1" | grep -oE '(/subtitle/download\?mac=[^"]*)')
+if test x"$url" = x""; then
+ exit 1
+else
+ url="http://subscene.com${url}"
+ file="/tmp/sub.$$.zip"
+ curl "${url}" > "$file"
+ unzip "$file" "*.srt"
+fi
(DIR) diff --git a/sub-search b/sub-search
@@ -0,0 +1,18 @@
+#!/bin/sh
+
+usage() {
+ printf 'usage: sub-search [term]\n' >&2
+ exit 1
+}
+
+getep() {
+ printf '%s' "$1" | grep -oE '([0-9]{2}[Ee][0-9]{2})'
+}
+
+test x"$1" = x"" && usage
+
+query="$1"
+url="http://subscene.com/subtitles/release"
+ep=$(getep "${url}")
+
+curl --get --data-urlencode "q=${query}" --data-urlencode "r=true" "${url}" | ./sub | grep -i 'LANG:en' | grep -i "${ep}" | grep -i "${name}"
(DIR) diff --git a/sub.c b/sub.c
@@ -0,0 +1,170 @@
+#include <ctype.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <strings.h>
+
+#include "util.h"
+#include "xml.h"
+
+struct sub {
+ int issub;
+ char title[256];
+ char lang[256];
+ int hi;
+ int files;
+ char author[256];
+ char authorurl[256];
+ char description[256];
+ char url[256];
+};
+
+static XMLParser parser; /* XML parser state */
+static struct sub sub;
+static char curclass[64];
+static char spanclass[64];
+
+static int
+istag(const char *s1, const char *s2) {
+ return !strcasecmp(s1, s2);
+}
+
+static int
+isattr(const char *s1, const char *s2) {
+ return !strcasecmp(s1, s2);
+}
+
+static void
+xml_handler_data(XMLParser *p, const char *data, size_t datalen) {
+ char *s = "";
+ char buf[1024];
+ size_t len;
+
+ if(!curclass[0])
+ return;
+
+ /* skip leading space */
+ for(s = (char *)data; *s && isspace(*s); s++);
+ strlcpy(buf, s, sizeof(buf));
+ for(s = buf; *s; s++) {
+ if(*s == '\r' || *s == '\n')
+ *s = ' ';
+ }
+ /* trim remaining space */
+ len = strlen(buf);
+ for(; len > 0; len--) {
+ if(!isspace(buf[len - 1]))
+ break;
+ buf[len - 1] = '\0';
+ }
+
+ s = buf;
+ if(!strlen(s))
+ return;
+ /* link */
+ if(strcmp(curclass, "a1") == 0) {
+ if(strcmp(spanclass, "") == 0) {
+ strlcpy(sub.title, s, sizeof(sub.title));
+ } else {
+ strlcpy(sub.lang, s, sizeof(sub.lang));
+ }
+ }
+ /* files */
+ if(strcmp(curclass, "a3") == 0) {
+ sub.files = atoi(s);
+ }
+
+ /* hearing impaired? */
+ if(strcmp(curclass, "a41") == 0) {
+ sub.hi = 1;
+ }
+ /*if(strcmp(curclass, "a40") == 0) {
+ sub.hi = 0;
+ }*/
+ /* author / user profile */
+ if(strcmp(curclass, "a5") == 0) {
+ strlcpy(sub.author, s, sizeof(sub.author));
+ }
+ /* description */
+ if(strcmp(curclass, "a6") == 0) {
+ strlcpy(sub.description, s, sizeof(sub.description));
+ }
+}
+
+static void
+xml_handler_start_element(XMLParser *p, const char *tag, size_t taglen) {
+ (void)p;
+ (void)taglen;
+
+ if(istag(tag, "tr")) {
+ memset(&sub, 0, sizeof(sub));
+ }
+}
+
+static void
+xml_handler_end_element(XMLParser *p, const char *tag, size_t taglen,
+ int isshort)
+{
+ (void)p;
+ (void)taglen;
+ (void)isshort;
+
+ if(istag(tag, "tr") && sub.issub == 1) {
+ printf("LANG:%s\tTITLE:%s\tURL:http://subscene.com%s\tHI:%d\tFILES:%d\tAUTHOR:%s\n",
+ sub.lang, sub.title, sub.url, sub.hi, sub.files, sub.author);
+ } else if(istag(tag, "td")) {
+ curclass[0] = '\0';
+ } else if(istag(tag, "span")) {
+ spanclass[0] = '\0';
+ }
+}
+
+static void
+xml_handler_attr(XMLParser *p, const char *tag, size_t taglen,
+ const char *name, size_t namelen, const char *value, size_t valuelen)
+{
+ (void)p;
+ (void)taglen;
+ (void)namelen;
+ (void)valuelen;
+
+ if(istag(tag, "td")) {
+ if(isattr(name, "class")) {
+ strlcpy(curclass, value, sizeof(curclass));
+ /* link */
+ if(strcmp(value, "a1") == 0) {
+ sub.issub = 1;
+ }
+ }
+ } else if(istag(tag, "span")) {
+ if(strcmp(curclass, "a1") == 0) {
+ if(isattr(name, "class")) {
+ strlcpy(spanclass, value, sizeof(spanclass));
+ }
+ }
+ } else if(istag(tag, "a")) {
+ /* subtitle / author profile url */
+ if(strcmp(name, "href") == 0) {
+ if((strcmp(curclass, "a1") == 0)) {
+ strlcpy(sub.url, value, sizeof(sub.url));
+ }
+ if((strcmp(curclass, "a5") == 0)) {
+ strlcpy(sub.authorurl, value, sizeof(sub.authorurl));
+ }
+ }
+ }
+}
+
+int
+main(void) {
+ xmlparser_init(&parser, stdin);
+
+ parser.xmltagstart = xml_handler_start_element;
+ parser.xmltagend = xml_handler_end_element;
+ parser.xmlattr = xml_handler_attr;
+ parser.xmldata = xml_handler_data;
+
+ xmlparser_parse(&parser);
+
+ return EXIT_SUCCESS;
+}
(DIR) diff --git a/util.c b/util.c
@@ -0,0 +1,35 @@
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include <sys/types.h>
+
+#include "util.h"
+
+/*
+ * Taken from OpenBSD.
+ * Copy src to string dst of size siz. At most siz-1 characters
+ * will be copied. Always NUL terminates (unless siz == 0).
+ * Returns strlen(src); if retval >= siz, truncation occurred.
+ */
+size_t
+strlcpy(char *dst, const char *src, size_t siz) {
+ char *d = dst;
+ const char *s = src;
+ size_t n = siz;
+
+ /* copy as many bytes as will fit */
+ if (n != 0) {
+ while (--n != 0) {
+ if ((*d++ = *s++) == '\0')
+ break;
+ }
+ }
+ /* not enough room in dst, add NUL and traverse rest of src */
+ if (n == 0) {
+ if (siz != 0)
+ *d = '\0'; /* NUL-terminate dst */
+ while (*s++)
+ ;
+ }
+ return(s - src - 1); /* count does not include NUL */
+}
(DIR) diff --git a/util.h b/util.h
@@ -0,0 +1,2 @@
+#undef strlcpy
+size_t strlcpy(char *, const char *, size_t);
(DIR) diff --git a/xml.c b/xml.c
@@ -0,0 +1,325 @@
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include <ctype.h>
+
+#include "xml.h"
+
+static __inline__ int /* like getc(), but do some smart buffering */
+xmlparser_getnext(XMLParser *x) {
+ return fgetc(x->fp);
+#if 0
+ if(x->readoffset >= x->readlastbytes) {
+ x->readoffset = 0;
+ if(!(x->readlastbytes = fread(x->readbuf, 1, sizeof(x->readbuf), x->fp)))
+ return EOF; /* 0 bytes read, assume EOF */
+ }
+ return (int)x->readbuf[x->readoffset++];
+#endif
+}
+
+static __inline__ void
+xmlparser_parseattrs(XMLParser *x) {
+ size_t namelen = 0, valuelen;
+ int c, endsep, endname = 0;
+
+ while((c = xmlparser_getnext(x)) != EOF) {
+ if(isspace(c)) { /* TODO: simplify endname ? */
+ if(namelen)
+ endname = 1;
+ continue;
+ }
+ if(c == '?')
+ ; /* ignore */
+ else if(c == '=') {
+ x->name[namelen] = '\0';
+ } else if(namelen && ((endname && isalpha(c)) || (c == '>' || c == '/'))) {
+ /* attribute without value */
+ x->name[namelen] = '\0';
+ if(x->xmlattrstart)
+ x->xmlattrstart(x, x->tag, x->taglen, x->name, namelen);
+ if(x->xmlattr)
+ x->xmlattr(x, x->tag, x->taglen, x->name, namelen, "", 0);
+ if(x->xmlattrend)
+ x->xmlattrend(x, x->tag, x->taglen, x->name, namelen);
+ endname = 0;
+ x->name[0] = c;
+ namelen = 1;
+ } else if(namelen && (c == '\'' || c == '"')) {
+ /* attribute with value */
+ endsep = c; /* c is end separator */
+ if(x->xmlattrstart)
+ x->xmlattrstart(x, x->tag, x->taglen, x->name, namelen);
+ for(valuelen = 0; (c = xmlparser_getnext(x)) != EOF;) {
+ if(c == '&' && x->xmlattrentity) { /* entities */
+ x->data[valuelen] = '\0';
+ /* call data function with data before entity if there is data */
+ if(valuelen && x->xmlattr)
+ x->xmlattr(x, x->tag, x->taglen, x->name, namelen, x->data, valuelen);
+ x->data[0] = c;
+ valuelen = 1;
+ while((c = xmlparser_getnext(x)) != EOF) {
+ if(c == endsep)
+ break;
+ if(valuelen < sizeof(x->data) - 1)
+ x->data[valuelen++] = c;
+ else {
+ /* TODO: entity too long? this should be very strange. */
+ x->data[valuelen] = '\0';
+ if(x->xmlattr)
+ x->xmlattr(x, x->tag, x->taglen, x->name, namelen, x->data, valuelen);
+ valuelen = 0;
+ break;
+ }
+ if(c == ';') {
+ x->data[valuelen] = '\0';
+ x->xmlattrentity(x, x->tag, x->taglen, x->name, namelen, x->data, valuelen);
+ valuelen = 0;
+ break;
+ }
+ }
+ } else if(c != endsep) {
+ if(valuelen < sizeof(x->data) - 1) {
+ x->data[valuelen++] = c;
+ } else {
+ x->data[valuelen] = '\0';
+ if(x->xmlattr)
+ x->xmlattr(x, x->tag, x->taglen, x->name, namelen, x->data, valuelen);
+ x->data[0] = c;
+ valuelen = 1;
+ }
+ }
+ if(c == endsep) {
+ x->data[valuelen] = '\0';
+ if(x->xmlattr)
+ x->xmlattr(x, x->tag, x->taglen, x->name, namelen, x->data, valuelen);
+ if(x->xmlattrend)
+ x->xmlattrend(x, x->tag, x->taglen, x->name, namelen);
+ break;
+ }
+ }
+ namelen = 0;
+ endname = 0;
+ } else if(namelen < sizeof(x->name) - 1)
+ x->name[namelen++] = c;
+ if(c == '>') {
+ break;
+ } else if(c == '/') {
+ x->isshorttag = 1;
+ namelen = 0;
+ x->name[0] = '\0';
+ }
+ }
+}
+
+static __inline__ void
+xmlparser_parsecomment(XMLParser *x) {
+ size_t datalen = 0, i = 0;
+ int c;
+
+ if(x->xmlcommentstart)
+ x->xmlcommentstart(x);
+ while((c = xmlparser_getnext(x)) != EOF) {
+ if(c == '-' && i < 2)
+ i++;
+ else if(c == '>') {
+ if(i == 2) { /* -- */
+ if(datalen >= 2) {
+ datalen -= 2;
+ x->data[datalen] = '\0';
+ if(x->xmlcomment)
+ x->xmlcomment(x, x->data, datalen);
+ }
+ if(x->xmlcommentend)
+ x->xmlcommentend(x);
+ break;
+ }
+ i = 0;
+ }
+ /* || (c == '-' && d >= sizeof(x->data) - 4)) { */
+ /* TODO: what if the end has --, and it's cut on the boundary, test this. */
+ if(datalen < sizeof(x->data) - 1)
+ x->data[datalen++] = c;
+ else {
+ x->data[datalen] = '\0';
+ if(x->xmlcomment)
+ x->xmlcomment(x, x->data, datalen);
+ x->data[0] = c;
+ datalen = 1;
+ }
+ }
+}
+
+/* TODO:
+ * <test><![CDATA[1234567dddd8]]]>
+ *
+ * with x->data of sizeof(15) gives 2 ] at end of cdata, should be 1
+ * test comment function too for similar bug?
+ *
+ */
+static __inline__ void
+xmlparser_parsecdata(XMLParser *x) {
+ size_t datalen = 0, i = 0;
+ int c;
+
+ if(x->xmlcdatastart)
+ x->xmlcdatastart(x);
+ while((c = xmlparser_getnext(x)) != EOF) {
+ if(c == ']' && i < 2) {
+ i++;
+ } else if(c == '>') {
+ if(i == 2) { /* ]] */
+ if(datalen >= 2) {
+ datalen -= 2;
+ x->data[datalen] = '\0';
+ if(x->xmlcdata && datalen)
+ x->xmlcdata(x, x->data, datalen);
+ }
+ if(x->xmlcdataend)
+ x->xmlcdataend(x);
+ break;
+ }
+ i = 0;
+ }
+ /* TODO: what if the end has ]>, and it's cut on the boundary */
+ if(datalen < sizeof(x->data) - 1) {
+ x->data[datalen++] = c;
+ } else {
+ x->data[datalen] = '\0';
+ if(x->xmlcdata)
+ x->xmlcdata(x, x->data, datalen);
+ x->data[0] = c;
+ datalen = 1;
+ }
+ }
+}
+
+void
+xmlparser_init(XMLParser *x, FILE *fp) {
+ memset(x, 0, sizeof(XMLParser));
+ x->fp = fp;
+}
+
+void
+xmlparser_parse(XMLParser *x) {
+ int c, ispi;
+ size_t datalen, tagdatalen, taglen;
+
+ while((c = xmlparser_getnext(x)) != EOF && c != '<'); /* skip until < */
+
+ while(c != EOF) {
+ if(c == '<') { /* parse tag */
+ if((c = xmlparser_getnext(x)) == EOF)
+ return;
+ x->tag[0] = '\0';
+ x->taglen = 0;
+ if(c == '!') { /* cdata and comments */
+ for(tagdatalen = 0; (c = xmlparser_getnext(x)) != EOF;) {
+ if(tagdatalen <= strlen("[CDATA[")) /* if(d < sizeof(x->data)) */
+ x->data[tagdatalen++] = c; /* TODO: prevent overflow */
+ if(c == '>')
+ break;
+ else if(c == '-' && tagdatalen == strlen("--") &&
+ (x->data[0] == '-')) { /* comment */
+ xmlparser_parsecomment(x);
+ break;
+ } else if(c == '[') {
+ if(tagdatalen == strlen("[CDATA[") &&
+ x->data[1] == 'C' && x->data[2] == 'D' &&
+ x->data[3] == 'A' && x->data[4] == 'T' &&
+ x->data[5] == 'A' && x->data[6] == '[') { /* cdata */
+ xmlparser_parsecdata(x);
+ break;
+ #if 0
+ } else {
+ /* TODO ? */
+ /* markup declaration section */
+ while((c = xmlparser_getnext(x)) != EOF && c != ']');
+ #endif
+ }
+ }
+ }
+ } else { /* normal tag (open, short open, close), processing instruction. */
+ if(isspace(c))
+ while((c = xmlparser_getnext(x)) != EOF && isspace(c));
+ if(c == EOF)
+ return;
+ x->tag[0] = c;
+ ispi = (c == '?') ? 1 : 0;
+ x->isshorttag = ispi;
+ taglen = 1;
+ while((c = xmlparser_getnext(x)) != EOF) {
+ if(c == '/') /* TODO: simplify short tag? */
+ x->isshorttag = 1; /* short tag */
+ else if(c == '>' || isspace(c)) {
+ x->tag[taglen] = '\0';
+ if(x->tag[0] == '/') { /* end tag, starts with </ */
+ x->taglen = --taglen; /* len -1 because of / */
+ if(taglen && x->xmltagend)
+ x->xmltagend(x, &(x->tag)[1], x->taglen, 0);
+ } else {
+ x->taglen = taglen;
+ if(x->xmltagstart)
+ x->xmltagstart(x, x->tag, x->taglen); /* start tag */
+ if(isspace(c))
+ xmlparser_parseattrs(x);
+ if(x->xmltagstartparsed)
+ x->xmltagstartparsed(x, x->tag, x->taglen, x->isshorttag);
+ }
+ if((x->isshorttag || ispi) && x->xmltagend) /* call tagend for shortform or processing instruction */
+ x->xmltagend(x, x->tag, x->taglen, 1);
+ break;
+ } else if(taglen < sizeof(x->tag) - 1)
+ x->tag[taglen++] = c;
+ }
+ }
+ } else {
+ /* parse data */
+ datalen = 0;
+ if(x->xmldatastart)
+ x->xmldatastart(x);
+ while((c = xmlparser_getnext(x)) != EOF) {
+ if(c == '&' && x->xmldataentity) {
+ if(datalen) {
+ x->data[datalen] = '\0';
+ x->xmldata(x, x->data, datalen);
+ }
+ x->data[0] = c;
+ datalen = 1;
+ while((c = xmlparser_getnext(x)) != EOF) {
+ if(c == '<')
+ break;
+ if(datalen < sizeof(x->data) - 1)
+ x->data[datalen++] = c;
+ if(isspace(c))
+ break;
+ else if(c == ';') {
+ x->data[datalen] = '\0';
+ x->xmldataentity(x, x->data, datalen);
+ datalen = 0;
+ break;
+ }
+ }
+ } else if(c != '<') {
+ if(datalen < sizeof(x->data) - 1) {
+ x->data[datalen++] = c;
+ } else {
+ x->data[datalen] = '\0';
+ if(x->xmldata)
+ x->xmldata(x, x->data, datalen);
+ x->data[0] = c;
+ datalen = 1;
+ }
+ }
+ if(c == '<') {
+ x->data[datalen] = '\0';
+ if(x->xmldata && datalen)
+ x->xmldata(x, x->data, datalen);
+ if(x->xmldataend)
+ x->xmldataend(x);
+ break;
+ }
+ }
+ }
+ }
+}
(DIR) diff --git a/xml.h b/xml.h
@@ -0,0 +1,49 @@
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+
+typedef struct xmlparser {
+ /* handlers */
+ void (*xmltagstart)(struct xmlparser *p, const char *tag, size_t taglen);
+ void (*xmltagstartparsed)(struct xmlparser *p, const char *tag,
+ size_t taglen, int isshort);
+ void (*xmltagend)(struct xmlparser *p, const char *tag, size_t taglen,
+ int isshort);
+ void (*xmldatastart)(struct xmlparser *p);
+ void (*xmldata)(struct xmlparser *p, const char *data, size_t datalen);
+ void (*xmldataend)(struct xmlparser *p);
+ void (*xmldataentity)(struct xmlparser *p, const char *data,
+ size_t datalen);
+ void (*xmlattrstart)(struct xmlparser *p, const char *tag, size_t taglen,
+ const char *name, size_t namelen);
+ void (*xmlattr)(struct xmlparser *p, const char *tag, size_t taglen,
+ const char *name, size_t namelen, const char *value,
+ size_t valuelen);
+ void (*xmlattrend)(struct xmlparser *p, const char *tag, size_t taglen,
+ const char *name, size_t namelen);
+ void (*xmlattrentity)(struct xmlparser *p, const char *tag, size_t taglen,
+ const char *name, size_t namelen, const char *value,
+ size_t valuelen);
+ void (*xmlcdatastart)(struct xmlparser *p);
+ void (*xmlcdata)(struct xmlparser *p, const char *data, size_t datalen);
+ void (*xmlcdataend)(struct xmlparser *p);
+ void (*xmlcommentstart)(struct xmlparser *p);
+ void (*xmlcomment)(struct xmlparser *p, const char *comment,
+ size_t commentlen);
+ void (*xmlcommentend)(struct xmlparser *p);
+
+ FILE *fp; /* file stream to read from */
+
+ /* private; internal state */
+ char tag[1024]; /* current tag */
+ int isshorttag; /* current tag is in short form ? */
+ size_t taglen;
+ char name[256]; /* current attribute name */
+ char data[BUFSIZ]; /* data buffer used for tag and attribute data */
+ size_t readoffset;
+ size_t readlastbytes;
+ unsigned char readbuf[BUFSIZ]; /* read buffer used by xmlparser_getnext */
+} XMLParser;
+
+void xmlparser_init(XMLParser *x, FILE *fp);
+void xmlparser_parse(XMLParser *x);