/*
 * MIME mail decoding.
 *
 * This module contains decoding routines for converting
 * quoted-printable data into pure 8-bit data, in MIME
 * formatted messages.
 *
 * By Henrik Storner <storner@image.dk>
 *
 * Configuration file support for fetchmail 4.3.8 by 
 * Frank Damgaard <frda@post3.tele.dk>
 * 
 * For license terms, see the file COPYING in this directory.
 */

#include <string.h>
#include <stdlib.h>
#include <stdio.h>
#include <ctype.h>
#include <iconv.h>
#include "unmime.h"

static unsigned char unhex(unsigned char c)
{
  if ((c >= '0') && (c <= '9'))
    return (c - '0');
  else if ((c >= 'A') && (c <= 'F'))
    return (c - 'A' + 10);
  else if ((c >= 'a') && (c <= 'f'))
    return (c - 'a' + 10);
  else
      return 16;	/* invalid hex character */
}

static int qp_char(unsigned char c1, unsigned char c2, unsigned char *c_out)
{
  c1 = unhex(c1);
  c2 = unhex(c2);

  if ((c1 > 15) || (c2 > 15)) 
    return 1;
  else {
    *c_out = 16*c1+c2;
    return 0;
  }
}


/*
 * Routines to decode MIME QP-encoded headers, as per RFC 2047.
 */

/* States of the decoding state machine */
#define S_COPY_PLAIN        0	/* Just copy, but watch for the QP flag */
#define S_SKIP_MIMEINIT     1	/* Get the encoding, and skip header */
#define S_COPY_MIME         2	/* Decode a sequence of coded characters */

static const char MIMEHDR_INIT[]  = "=?";	/* Start of coded sequence */
static const char MIMEHDR_END[]   = "?=";	/* End of coded sequence */

void UnMimeHeader(unsigned char *hdr)
{
  /* Decode a buffer containing data encoded according to RFC
   * 2047. This only handles content-transfer-encoding; conversion
   * between character sets is not implemented.  In other words: We
   * assume the charsets used can be displayed by your mail program
   * without problems. 
   */

  /* Note: Decoding is done "in-situ", i.e. without using an
   * additional buffer for temp. storage. This is possible, since the
   * decoded string will always be shorter than the encoded string,
   * due to the encoding scheme.
   */

  int  state = S_COPY_PLAIN;
  unsigned char *p_in, *p_out, *p;
  unsigned char enc = '\0';		/* initialization pacifies -Wall */
  int  i;
  char charset[128];

  /* Speed up in case this is not a MIME-encoded header */
  p = strstr(hdr, MIMEHDR_INIT);
  if (p == NULL)
    return;   /* No MIME header */

  /* Loop through the buffer.
   *  p_in : Next char to be processed.
   *  p_out: Where to put the next processed char
   *  enc  : Encoding used (usually, 'q' = quoted-printable)
   */
  for (p_out = p_in = hdr; (*p_in); ) {
    switch (state) {
    case S_COPY_PLAIN:
      p = strstr(p_in, MIMEHDR_INIT);
      if (p == NULL) {
	/* 
	 * No more coded data in buffer, 
         * just move remainder into place. 
	 */
        i = strlen(p_in);   /* How much left */
	memmove(p_out, p_in, i);
	p_in += i; p_out += i;
      }
      else {
	/* MIME header init found at location p */
	if (p > p_in) {
          /* There are some uncoded chars at the beginning. */
          i = (p - p_in);
	  memmove(p_out, p_in, i);
	  p_out += i;
	}
	p_in = (p + 2);
	state = S_SKIP_MIMEINIT;
      }
      break;

    case S_SKIP_MIMEINIT:
      /* Mime type definition: "charset?encoding?" */
      p = strchr(p_in, '?');
      if (p != NULL) {
	/* p_in .. (p-1) holds the charset */
	strncpy(charset, p_in, p - p_in);
	charset[p - p_in] = '\0';

	/* *(p+1) is the transfer encoding, *(p+2) must be a '?' */
	if (*(p+2) == '?') {
	  enc = tolower(*(p+1));
	  p_in = p+3;
	  state = S_COPY_MIME;
	}
	else
	  state = S_COPY_PLAIN;
      }
      else
	state = S_COPY_PLAIN;   /* Invalid data */
      break;

    case S_COPY_MIME:
      p = strstr(p_in, MIMEHDR_END);  /* Find end of coded data */
      if (p == NULL) p = p_in + strlen(p_in);
      for (; (p_in < p); ) {
	/* Decode all encoded data */
	if (enc == 'q') {
	  if (*p_in == '=') {
	    /* Decode one char qp-coded at (p_in+1) and (p_in+2) */
	    if (qp_char(*(p_in+1), *(p_in+2), p_out) == 0)
	      p_in += 3;
	    else {
	      /* Invalid QP data - pass through unchanged. */
	      *p_out = *p_in;
	      p_in++;
	    }
	  }
	  else if (*p_in == '_') {
	    /* 
             * RFC 2047: '_' inside encoded word represents 0x20.
             * NOT a space - always the value 0x20.
             */
	    *p_out = 0x20;
	    p_in++;
	  }
	  else {
	    /* Copy unchanged */
	    *p_out = *p_in;
	    p_in++;
	  }
	  p_out++;
	}
	else if (enc == 'b') {
	  /* Decode base64 encoded data */
	  char delimsave;
	  int decoded_count;

	  delimsave = *p; *p = '\r';
	  decoded_count = from64tobits(p_out, p_in, 0);
	  *p = delimsave;
	  if (decoded_count > 0) 
	    p_out += decoded_count;            
	  p_in = p;
	}
	else {
	  /* Copy unchanged */
	  *p_out = *p_in;
	  p_in++;
	  p_out++;
	}
      }
      if (*p_in)
	p_in += 2;   /* Skip the MIMEHDR_END delimiter */

      /* 
       * We've completed decoding one encoded sequence. But another
       * may follow immediately, in which case whitespace before the
       * new MIMEHDR_INIT delimiter must be discarded.
       * See if that is the case 
       */
      p = strstr(p_in, MIMEHDR_INIT);
      state = S_COPY_PLAIN;
      if (p != NULL) {
	/*
	 * There is more MIME data later on. Is there
         * whitespace  only before the delimiter? 
	 */
        unsigned char *q;
        int  wsp_only = 1;

        for (q=p_in; (wsp_only && (q < p)); q++)
          wsp_only = isspace(*q);

        if (wsp_only) {
	  /* 
	   * Whitespace-only before the MIME delimiter. OK,
           * just advance p_in to past the new MIMEHDR_INIT,
           * and prepare to process the new MIME charset/encoding
	   * header.
	   */
	  p_in = p + sizeof(MIMEHDR_INIT) - 1;
	  state = S_SKIP_MIMEINIT;
        }
      }
      break;
    }
  }

  *p_out = '\0';

  if ((! strcmp(charset, "UTF-8")) || (! strcmp(charset, "utf-8"))) {
     char obuf[1024], *ip, *op;
     size_t ileft, oleft;

     ip = hdr; op = obuf;
     ileft = strlen(ip); oleft = 1023;
     iconv(icd, (const char **)&ip, &ileft, (char **)&op, &oleft);
     if (ileft)
	return;
     *op = '\0';
     strcpy(hdr, obuf);
  }
}

void unmime_header(unsigned char *s)
{
    unsigned char *p;

    UnMimeHeader(s);

    /* Convert soft hyphens to spaces */
    for (p = strchr(s, '\xAD'); p; p = strchr(p, '\xAD'))
	*p = ' ';

    /* Convert carriage returns and newlines to spaces */
    for (p = strchr(s, '\r'); p; p = strchr(p, '\r'))
	*p = ' ';
    for (p = strchr(s, '\n'); p; p = strchr(p, '\n'))
	*p = ' ';
}
.