📄 unmime.c

📁 用于对收发邮件编码进行解码的类文件,功能简洁
💻 C
📖 第 1 页 / 共 2 页
字号:
12 下一页
/*
 * MIME mail decoding.
 *
 * This module contains decoding routines for converting
 * quoted-printable data into pure 8-bit data, in MIME
 * formatted messages.
 *
 * By Henrik Storner <storner@image.dk>
 *
 * Configuration file support for fetchmail 4.3.8 by 
 * Frank Damgaard <frda@post3.tele.dk>
 * 
 */

#include "config.h"
#include <string.h>
#include <stdlib.h>
#include <ctype.h>
#include "fetchmail.h"

static unsigned char unhex(unsigned char c)
{
  if ((c >= '0') && (c <= '9'))
    return (c - '0');
  else if ((c >= 'A') && (c <= 'F'))
    return (c - 'A' + 10);
  else if ((c >= 'a') && (c <= 'f'))
    return (c - 'a' + 10);
  else
    return c;
}

static int qp_char(unsigned char c1, unsigned char c2, unsigned char *c_out)
{
  c1 = unhex(c1);
  c2 = unhex(c2);

  if ((c1 > 15) || (c2 > 15)) 
    return 1;
  else {
    *c_out = 16*c1+c2;
    return 0;
  }
}



/*
 * Routines to decode MIME QP-encoded headers, as per RFC 2047.
 */

/* States of the decoding state machine */
#define S_COPY_PLAIN        0	/* Just copy, but watch for the QP flag */
#define S_SKIP_MIMEINIT     1	/* Get the encoding, and skip header */
#define S_COPY_MIME         2	/* Decode a sequence of coded characters */

static const char MIMEHDR_INIT[]  = "=?";	/* Start of coded sequence */
static const char MIMEHDR_END[]   = "?=";	/* End of coded sequence */

void UnMimeHeader(unsigned char *hdr)
{
  /* Decode a buffer containing data encoded according to RFC
   * 2047. This only handles content-transfer-encoding; conversion
   * between character sets is not implemented.  In other words: We
   * assume the charsets used can be displayed by your mail program
   * without problems. 
   */

  /* Note: Decoding is done "in-situ", i.e. without using an
   * additional buffer for temp. storage. This is possible, since the
   * decoded string will always be shorter than the encoded string,
   * due to the en- coding scheme.
   */

  int  state = S_COPY_PLAIN;
  unsigned char *p_in, *p_out, *p;
  unsigned char enc = '\0';		/* initialization pacifies -Wall */
  int  i;

  /* Speed up in case this is not a MIME-encoded header */
  p = strstr(hdr, MIMEHDR_INIT);
  if (p == NULL)
    return;   /* No MIME header */

  /* Loop through the buffer.
   *  p_in : Next char to be processed.
   *  p_out: Where to put the next processed char
   *  enc  : Encoding used (usually, 'q' = quoted-printable)
   */
  for (p_out = p_in = hdr; (*p_in); ) {
    switch (state) {
    case S_COPY_PLAIN:
      p = strstr(p_in, MIMEHDR_INIT);
      if (p == NULL) {
	/* 
	 * No more coded data in buffer, 
         * just move remainder into place. 
	 */
        i = strlen(p_in);   /* How much left */
	memmove(p_out, p_in, i);
	p_in += i; p_out += i;
      }
      else {
	/* MIME header init found at location p */
	if (p > p_in) {
          /* There are some uncoded chars at the beginning. */
          i = (p - p_in);
	  memmove(p_out, p_in, i);
	  p_out += i;
	}
	p_in = (p + 2);
	state = S_SKIP_MIMEINIT;
      }
      break;

    case S_SKIP_MIMEINIT:
      /* Mime type definition: "charset?encoding?" */
      p = strchr(p_in, '?');
      if (p != NULL) {
	/* p_in .. (p-1) holds the charset */

	/* *(p+1) is the transfer encoding, *(p+2) must be a '?' */
	if (*(p+2) == '?') {
	  enc = tolower(*(p+1));
	  p_in = p+3;
	  state = S_COPY_MIME;
	}
	else
	  state = S_COPY_PLAIN;
      }
      else
	state = S_COPY_PLAIN;   /* Invalid data */
      break;

    case S_COPY_MIME:
      p = strstr(p_in, MIMEHDR_END);  /* Find end of coded data */
      if (p == NULL) p = p_in + strlen(p_in);
      for (; (p_in < p); ) {
	/* Decode all encoded data */
	if (enc == 'q') {
	  if (*p_in == '=') {
	    /* Decode one char qp-coded at (p_in+1) and (p_in+2) */
	    if (qp_char(*(p_in+1), *(p_in+2), p_out) == 0)
	      p_in += 3;
	    else {
	      /* Invalid QP data - pass through unchanged. */
	      *p_out = *p_in;
	      p_in++;
	    }
	  }
	  else if (*p_in == '_') {
	    /* 
             * RFC 2047: '_' inside encoded word represents 0x20.
             * NOT a space - always the value 0x20.
             */
	    *p_out = 0x20;
	    p_in++;
	  }
	  else {
	    /* Copy unchanged */
	    *p_out = *p_in;
	    p_in++;
	  }
	  p_out++;
	}
	else if (enc == 'b') {
	  /* Decode base64 encoded data */
	  char delimsave;
	  int decoded_count;

	  delimsave = *p; *p = '\r';
	  decoded_count = from64tobits(p_out, p_in);
	  *p = delimsave;
	  if (decoded_count > 0) 
	    p_out += decoded_count;            
	  p_in = p;
	}
	else {
	  /* Copy unchanged */
	  *p_out = *p_in;
	  p_in++;
	  p_out++;
	}
      }
      if (*p_in)
	p_in += 2;   /* Skip the MIMEHDR_END delimiter */

      /* 
       * We've completed decoding one encoded sequence. But another
       * may follow immediately, in which case whitespace before the
       * new MIMEHDR_INIT delimiter must be discarded.
       * See if that is the case 
       */
      p = strstr(p_in, MIMEHDR_INIT);
      state = S_COPY_PLAIN;
      if (p != NULL) {
	/*
	 * There is more MIME data later on. Is there
         * whitespace  only before the delimiter? 
	 */
        unsigned char *q;
        int  wsp_only = 1;

        for (q=p_in; (wsp_only && (q < p)); q++)
          wsp_only = isspace(*q);

        if (wsp_only) {
	  /* 
	   * Whitespace-only before the MIME delimiter. OK,
           * just advance p_in to past the new MIMEHDR_INIT,
           * and prepare to process the new MIME charset/encoding
	   * header.
	   */
	  p_in = p + strlen(MIMEHDR_INIT);
	  state = S_SKIP_MIMEINIT;
        }
      }
      break;
    }
  }

  *p_out = '\0';
}



/*
 * Routines for decoding body-parts of a message.
 *
 * Since the "fetch" part of fetchmail gets a message body
 * one line at a time, we need to maintain some state variables
 * across multiple invokations of the UnMimeBodyline() routine.
 * The driver routine should call MimeBodyType() when all
 * headers have been received, and then UnMimeBodyline() for
 * every line in the message body.
 *
 */
#define S_BODY_DATA 0
#define S_BODY_HDR  1

/* 
 * Flag indicating if we are currently processing 
 * the headers or the body of a (multipart) message.
 */
static int  BodyState = S_BODY_DATA;

/* 
 * Flag indicating if we are in the process of decoding
 * a quoted-printable body part.
 */
static int  CurrEncodingIsQP = 0;

/* 
 * Delimiter for multipart messages. RFC 2046 states that this must
 * NEVER be longer than 70 characters. Add 3 for the two hyphens
 * at the beginning, and a terminating null.
 */
#define MAX_DELIM_LEN 70
static unsigned char MultipartDelimiter[MAX_DELIM_LEN+3];


/* This string replaces the "Content-Transfer-Encoding: quoted-printable"
 * string in all headers, including those in body-parts. The replacement
 * must be no longer than the original string.
 */
static const char ENC8BIT[] = "Content-Transfer-Encoding: 8bit";
static void SetEncoding8bit(unsigned char *XferEncOfs)
{
  unsigned char *p;

  if (XferEncOfs != NULL) {
     memcpy(XferEncOfs, ENC8BIT, strlen(ENC8BIT));

     /* If anything left, in this header, replace with whitespace */
     for (p=XferEncOfs+strlen(ENC8BIT); (*p >= ' '); p++) *p=' ';
  }
}

static char *GetBoundary(char *CntType)
{
  char *p1, *p2;
  int flag;

  /* Find the "boundary" delimiter. It must be preceded with a ';'
   * and optionally some whitespace.
   */
  p1 = CntType;
  do {
    p2 = strchr(p1, ';'); 
    if (p2)
      for (p2++; isspace(*p2); p2++);

    p1 = p2;
  } while ((p1) && (strncasecmp(p1, "boundary", 8) != 0));

  if (p1 == NULL)
    /* No boundary delimiter */
    return NULL;

  /* Skip "boundary", whitespace and '='; check that we do have a '=' */
  for (p1+=8, flag=0; (isspace(*p1) || (*p1 == '=')); p1++)
    flag |= (*p1 == '=');
  if (!flag)
    return NULL;

  /* Find end of boundary delimiter string */
  if (*p1 == '\"') {
    /* The delimiter is inside quotes */
    p1++;
    p2 = strchr(p1, '\"');
    if (p2 == NULL)
      return NULL;  /* No closing '"' !?! */
  }
  else {
    /* There might be more text after the "boundary" string. */
    p2 = strchr(p1, ';');  /* Safe - delimiter with ';' must be in quotes */
  }

  /* Zero-terminate the boundary string */
  if (p2 != NULL)
    *p2 = '\0';

  return (p1 && strlen(p1)) ? p1 : NULL;
}


/*
 * This routine does three things:
 * 1) It determines - based on the message headers - whether the
 *    message body is a MIME message that may hold 8 bit data.
 *    - A message that has a "quoted-printable" or "8bit" transfer 
 *      encoding is assumed to contain 8-bit data (when decoded).
 *    - A multipart message is assumed to contain 8-bit data
 *      when decoded (there might be quoted-printable body-parts).
 *    - All other messages are assumed NOT to include 8-bit data.
 * 2) It determines the delimiter-string used in multi-part message
 *    bodies.
 * 3) It sets the initial values of the CurrEncodingIsQP and BodyState
 *    variables, from the header contents.
 *
 * The return value is a bitmask.
 */
12 下一页
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -