tImproved parsing of HTML headers and handling of redirects - vaccinewars - be a doctor and try to vaccinate the world
(HTM) git clone git://src.adamsgaard.dk/vaccinewars
(DIR) Log
(DIR) Files
(DIR) Refs
(DIR) README
(DIR) LICENSE
---
(DIR) commit 6b906e129d9e1c2182b34824d49bae6466d2f3c7
(DIR) parent e47416a9a52ecccf7a41c4dd0a96a7dfff5b10a6
(HTM) Author: Ben Webb <ben@salilab.org>
Date: Tue, 2 Oct 2001 21:44:55 +0000
Improved parsing of HTML headers and handling of redirects
Diffstat:
M src/network.c | 103 ++++++++++++++++++++++++-------
M src/network.h | 4 +++-
2 files changed, 82 insertions(+), 25 deletions(-)
---
(DIR) diff --git a/src/network.c b/src/network.c
t@@ -442,13 +442,8 @@ static void SendHttpRequest(HttpConnection *conn) {
text=g_string_new("");
- if (conn->Redirect) {
- g_string_sprintf(text,"%s %s HTTP/1.0",conn->Method,conn->Redirect);
- g_free(conn->Redirect); conn->Redirect=NULL;
- } else {
- g_string_sprintf(text,"%s http://%s:%u%s HTTP/1.0",
- conn->Method,conn->HostName,conn->Port,conn->Query);
- }
+ g_string_sprintf(text,"%s http://%s:%u%s HTTP/1.0",
+ conn->Method,conn->HostName,conn->Port,conn->Query);
QueueMessageForSend(&conn->NetBuf,text->str);
if (conn->Headers) QueueMessageForSend(&conn->NetBuf,conn->Headers);
t@@ -482,8 +477,8 @@ static gboolean StartHttpConnect(HttpConnection *conn) {
gboolean OpenHttpConnection(HttpConnection **connpt,gchar *HostName,
unsigned Port,gchar *Proxy,unsigned ProxyPort,
- gchar *Method,gchar *Query,gchar *Headers,
- gchar *Body) {
+ gchar *Method,gchar *Query,
+ gchar *Headers,gchar *Body) {
HttpConnection *conn;
g_assert(HostName && Method && Query && connpt);
t@@ -515,7 +510,8 @@ void CloseHttpConnection(HttpConnection *conn) {
g_free(conn->Query);
g_free(conn->Headers);
g_free(conn->Body);
- g_free(conn->Redirect);
+ g_free(conn->RedirHost);
+ g_free(conn->RedirQuery);
g_free(conn);
}
t@@ -523,6 +519,65 @@ gboolean IsHttpError(HttpConnection *conn) {
return IsError(&conn->NetBuf.error);
}
+static gboolean ParseHtmlLocation(gchar *uri,gchar **host,unsigned *port,
+ gchar **query) {
+ gchar *uris,*colon,*slash;
+
+ uris = g_strstrip(uri);
+ if (!uris || strlen(uris)<7 ||
+ g_strncasecmp(uris,"http://",7)!=0) return FALSE;
+
+ uris+=7; /* skip to hostname */
+
+/* ':' denotes the port to connect to */
+ colon = strchr(uris,':');
+ if (colon && colon==uris) return FALSE; /* No hostname */
+
+/* '/' denotes the start of the path of the HTML file */
+ slash = strchr(uris,'/');
+ if (slash && slash==uris) return FALSE; /* No hostname */
+
+ if (colon && (!slash || slash>colon)) {
+ if (slash) *slash='\0';
+ *port = atoi(colon+1);
+ if (slash) *slash='\\';
+ if (*port==0) return FALSE; /* Invalid port */
+ *host = g_strndup(uris,colon-uris);
+ } else {
+ *port=80;
+ if (slash) *host=g_strndup(uris,slash-uris);
+ else *host=g_strdup(uris);
+ }
+
+ if (slash) {
+ *query = g_strdup(slash);
+ } else {
+ *query = g_strdup("/");
+ }
+ return TRUE;
+}
+
+static void ParseHtmlHeader(gchar *line,HttpConnection *conn) {
+ gchar **split,*host,*query;
+ unsigned port;
+
+ split=g_strsplit(line," ",1);
+ if (split[0] && split[1]) {
+ if (g_strcasecmp(split[0],"Location:")==0 &&
+ (conn->StatusCode==302 || conn->StatusCode==301)) {
+ if (ParseHtmlLocation(split[1],&host,&port,&query)) {
+ g_print("Redirect to %s:%u%s\n",host,port,query);
+ g_free(conn->RedirHost); g_free(conn->RedirQuery);
+ conn->RedirHost=host; conn->RedirQuery=query;
+ conn->RedirPort=port;
+ } else {
+ g_print("FIXME: Bad redirect\n");
+ }
+ }
+ }
+ g_strfreev(split);
+}
+
gchar *ReadHttpResponse(HttpConnection *conn) {
gchar *msg,**split;
t@@ -539,18 +594,7 @@ gchar *ReadHttpResponse(HttpConnection *conn) {
break;
case HS_READHEADERS:
if (msg[0]==0) conn->Status=HS_READSEPARATOR;
- else {
- split=g_strsplit(msg," ",1);
- if (split[0] && split[1]) {
- if (conn->StatusCode==302 && strcmp(split[0],"Location:")==0) {
- g_print("Redirect to %s\n",split[1]);
- g_free(conn->Redirect);
- conn->Redirect = g_strdup(split[1]);
- }
-/* g_print("Header %s (value %s) read\n",split[0],split[1]);*/
- }
- g_strfreev(split);
- }
+ else ParseHtmlHeader(msg,conn);
break;
case HS_READSEPARATOR:
conn->Status=HS_READBODY;
t@@ -564,11 +608,22 @@ gchar *ReadHttpResponse(HttpConnection *conn) {
gboolean HandleHttpCompletion(HttpConnection *conn) {
NBCallBack CallBack;
gpointer CallBackData;
- if (conn->Redirect) {
- g_print("Following redirect\n");
+ if (conn->RedirHost) {
+ g_print("Following redirect to %s\n",conn->RedirHost);
CallBack=conn->NetBuf.CallBack;
CallBackData=conn->NetBuf.CallBackData;
ShutdownNetworkBuffer(&conn->NetBuf);
+ g_free(conn->HostName); g_free(conn->Query);
+ conn->HostName = conn->RedirHost;
+ conn->Query = conn->RedirQuery;
+ conn->Port = conn->RedirPort;
+ conn->RedirHost = conn->RedirQuery = NULL;
+
+ if (conn->Tries>=5) {
+ g_print("FIXME: Number of tries exceeded\n");
+ return TRUE;
+ }
+
if (StartHttpConnect(conn)) {
SendHttpRequest(conn);
SetNetworkBufferCallBack(&conn->NetBuf,CallBack,CallBackData);
(DIR) diff --git a/src/network.h b/src/network.h
t@@ -95,7 +95,9 @@ typedef struct _HttpConnection {
gchar *Query; /* e.g. the path of the desired webpage */
gchar *Headers; /* if non-NULL, e.g. Content-Type */
gchar *Body; /* if non-NULL, data to send */
- gchar *Redirect; /* if non-NULL, a URL to redirect to */
+ gchar *RedirHost; /* if non-NULL, a hostname to redirect to */
+ gchar *RedirQuery; /* if non-NULL, the path to redirect to */
+ unsigned RedirPort; /* The port on the host to redirect to */
NetworkBuffer NetBuf; /* The actual network connection itself */
gint Tries; /* Number of requests actually sent so far */
gint StatusCode; /* 0=no status yet, otherwise an HTTP status code */