/* * MIME mail decoding. * * This module contains decoding routines for converting * quoted-printable data into pure 8-bit data, in MIME * formatted messages. * * By Henrik Storner * * Configuration file support for fetchmail 4.3.8 by * Frank Damgaard * * For license terms, see the file COPYING in this directory. */ #include #include #include #include #include #include "unmime.h" static unsigned char unhex(unsigned char c) { if ((c >= '0') && (c <= '9')) return (c - '0'); else if ((c >= 'A') && (c <= 'F')) return (c - 'A' + 10); else if ((c >= 'a') && (c <= 'f')) return (c - 'a' + 10); else return 16; /* invalid hex character */ } static int qp_char(unsigned char c1, unsigned char c2, unsigned char *c_out) { c1 = unhex(c1); c2 = unhex(c2); if ((c1 > 15) || (c2 > 15)) return 1; else { *c_out = 16*c1+c2; return 0; } } /* * Routines to decode MIME QP-encoded headers, as per RFC 2047. */ /* States of the decoding state machine */ #define S_COPY_PLAIN 0 /* Just copy, but watch for the QP flag */ #define S_SKIP_MIMEINIT 1 /* Get the encoding, and skip header */ #define S_COPY_MIME 2 /* Decode a sequence of coded characters */ static const char MIMEHDR_INIT[] = "=?"; /* Start of coded sequence */ static const char MIMEHDR_END[] = "?="; /* End of coded sequence */ void UnMimeHeader(unsigned char *hdr) { /* Decode a buffer containing data encoded according to RFC * 2047. This only handles content-transfer-encoding; conversion * between character sets is not implemented. In other words: We * assume the charsets used can be displayed by your mail program * without problems. */ /* Note: Decoding is done "in-situ", i.e. without using an * additional buffer for temp. storage. This is possible, since the * decoded string will always be shorter than the encoded string, * due to the encoding scheme. */ int state = S_COPY_PLAIN; unsigned char *p_in, *p_out, *p; unsigned char enc = '\0'; /* initialization pacifies -Wall */ int i; char charset[128]; /* Speed up in case this is not a MIME-encoded header */ p = strstr(hdr, MIMEHDR_INIT); if (p == NULL) return; /* No MIME header */ /* Loop through the buffer. * p_in : Next char to be processed. * p_out: Where to put the next processed char * enc : Encoding used (usually, 'q' = quoted-printable) */ for (p_out = p_in = hdr; (*p_in); ) { switch (state) { case S_COPY_PLAIN: p = strstr(p_in, MIMEHDR_INIT); if (p == NULL) { /* * No more coded data in buffer, * just move remainder into place. */ i = strlen(p_in); /* How much left */ memmove(p_out, p_in, i); p_in += i; p_out += i; } else { /* MIME header init found at location p */ if (p > p_in) { /* There are some uncoded chars at the beginning. */ i = (p - p_in); memmove(p_out, p_in, i); p_out += i; } p_in = (p + 2); state = S_SKIP_MIMEINIT; } break; case S_SKIP_MIMEINIT: /* Mime type definition: "charset?encoding?" */ p = strchr(p_in, '?'); if (p != NULL) { /* p_in .. (p-1) holds the charset */ strncpy(charset, p_in, p - p_in); charset[p - p_in] = '\0'; /* *(p+1) is the transfer encoding, *(p+2) must be a '?' */ if (*(p+2) == '?') { enc = tolower(*(p+1)); p_in = p+3; state = S_COPY_MIME; } else state = S_COPY_PLAIN; } else state = S_COPY_PLAIN; /* Invalid data */ break; case S_COPY_MIME: p = strstr(p_in, MIMEHDR_END); /* Find end of coded data */ if (p == NULL) p = p_in + strlen(p_in); for (; (p_in < p); ) { /* Decode all encoded data */ if (enc == 'q') { if (*p_in == '=') { /* Decode one char qp-coded at (p_in+1) and (p_in+2) */ if (qp_char(*(p_in+1), *(p_in+2), p_out) == 0) p_in += 3; else { /* Invalid QP data - pass through unchanged. */ *p_out = *p_in; p_in++; } } else if (*p_in == '_') { /* * RFC 2047: '_' inside encoded word represents 0x20. * NOT a space - always the value 0x20. */ *p_out = 0x20; p_in++; } else { /* Copy unchanged */ *p_out = *p_in; p_in++; } p_out++; } else if (enc == 'b') { /* Decode base64 encoded data */ char delimsave; int decoded_count; delimsave = *p; *p = '\r'; decoded_count = from64tobits(p_out, p_in, 0); *p = delimsave; if (decoded_count > 0) p_out += decoded_count; p_in = p; } else { /* Copy unchanged */ *p_out = *p_in; p_in++; p_out++; } } if (*p_in) p_in += 2; /* Skip the MIMEHDR_END delimiter */ /* * We've completed decoding one encoded sequence. But another * may follow immediately, in which case whitespace before the * new MIMEHDR_INIT delimiter must be discarded. * See if that is the case */ p = strstr(p_in, MIMEHDR_INIT); state = S_COPY_PLAIN; if (p != NULL) { /* * There is more MIME data later on. Is there * whitespace only before the delimiter? */ unsigned char *q; int wsp_only = 1; for (q=p_in; (wsp_only && (q < p)); q++) wsp_only = isspace(*q); if (wsp_only) { /* * Whitespace-only before the MIME delimiter. OK, * just advance p_in to past the new MIMEHDR_INIT, * and prepare to process the new MIME charset/encoding * header. */ p_in = p + sizeof(MIMEHDR_INIT) - 1; state = S_SKIP_MIMEINIT; } } break; } } *p_out = '\0'; if ((! strcmp(charset, "UTF-8")) || (! strcmp(charset, "utf-8"))) { char obuf[1024], *ip, *op; size_t ileft, oleft; ip = hdr; op = obuf; ileft = strlen(ip); oleft = 1023; iconv(icd, (const char **)&ip, &ileft, (char **)&op, &oleft); if (ileft) return; *op = '\0'; strcpy(hdr, obuf); } } void unmime_header(unsigned char *s) { unsigned char *p; UnMimeHeader(s); /* Convert soft hyphens to spaces */ for (p = strchr(s, '\xAD'); p; p = strchr(p, '\xAD')) *p = ' '; /* Convert carriage returns and newlines to spaces */ for (p = strchr(s, '\r'); p; p = strchr(p, '\r')) *p = ' '; for (p = strchr(s, '\n'); p; p = strchr(p, '\n')) *p = ' '; } .