uuscan.c

来自「UUDeview是一个编码解码器」· C语言 代码 · 共 2,492 行 · 第 1/5 页

C
2,492
字号
/* * This file is part of uudeview, the simple and friendly multi-part multi- * file uudecoder  program  (c) 1994-2001 by Frank Pilhofer. The author may * be contacted at fp@fpx.de * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the * GNU General Public License for more details. *//* * These are very central functions of UUDeview. Here, we scan a file * and decide whether it contains encoded data or not. ScanPart() must * be called repeatedly on the same file until feof(file). Each time, * it returns information about the next part found within. */#ifdef HAVE_CONFIG_H#include "config.h"#endif#ifdef SYSTEM_WINDLL#include <windows.h>#endif#ifdef SYSTEM_OS2#include <os2.h>#endif#include <stdio.h>#include <ctype.h>#ifdef STDC_HEADERS#include <stdlib.h>#include <string.h>#endif#ifdef HAVE_MALLOC_H#include <malloc.h>#endif#ifdef HAVE_UNISTD_H#include <unistd.h>#endif#ifdef HAVE_MEMORY_H#include <memory.h>#endif#ifdef HAVE_ERRNO_H#include <errno.h>#endif#include <uudeview.h>#include <uuint.h>#include <fptools.h>#include <uustring.h>char * uuscan_id = "$Id: uuscan.c,v 1.46 2004/03/01 22:52:27 fp Exp $";/* * Header fields we recognize as such. See RFC822. We add "From ", * the usual marker for a beginning of a new message, and a couple * of usual MDA, News and MIME headers. * We make a distinction of MIME headers as we need the difference * to scan the bodies from partial multipart messages. */static char *knownmsgheaders[] = {  "From ", "Return-Path:", "Received:", "Reply-To:",  "From:", "Sender:", "Resent-Reply-To:", "Resent-From:",  "Resent-Sender:", "Date:", "Resent-Date:", "To:",  "Resent-To:", "Cc:", "Bcc:", "Resent-bcc:",  "Message-ID:", "Resent-Message-Id:", "In-Reply-To:",  "References:", "Keywords:", "Subject:", "Comments:",    "Delivery-Date:", "Posted-Date:", "Received-Date:",  "Precedence:",   "Path:", "Newsgroups:", "Organization:", "Lines:",  "NNTP-Posting-Host:",  NULL};static char *knownmimeheaders[] = {  "Mime-Version:",  "Content-Transfer-Encoding:",  "Content-Type:", "Content-Disposition:",   "Content-Description:", "Content-Length:",  NULL};/* * for MIME (plaintext) parts without filename */int mimseqno;/* * how many lines do we read when checking for headers */#define WAITHEADER	10/* * The stack for encapsulated Multipart messages */#define MSMAXDEPTH	3int       mssdepth = 0;scanstate multistack[MSMAXDEPTH+1];/* * The state and the local envelope */headers   localenv;scanstate sstate;/* * mallocable areas */char *uuscan_shlline;char *uuscan_shlline2;char *uuscan_pvvalue;char *uuscan_phtext;char *uuscan_sdline;char *uuscan_sdbhds1;char *uuscan_sdbhds2;char *uuscan_spline;/* * Macro: print cancellation message in UUScanPart */#define SPCANCEL()	{UUMessage(uuscan_id,__LINE__,UUMSG_NOTE,uustring(S_SCAN_CANCEL));*errcode=UURET_CANCEL;goto ScanPartEmergency;}/* * Is line empty? A line is empty if it is composed of whitespace. */static intIsLineEmpty (char *data){  if (data == NULL) return 0;  while (*data && isspace (*data)) data++;  return ((*data)?0:1);}/* * Is this a header line? A header line has alphanumeric characters * followed by a colon. */static intIsHeaderLine (char *data){  if (data == NULL) return 0;  if (*data == ':') return 0;  while (*data && (isalnum (*data) || *data=='-')) data++;  return (*data == ':') ? 1 : 0;}/* * Scans a potentially folded header line from the input file. If * initial is non-NULL, it is the first line of the header, useful * if the calling function just coincidentally noticed that this is * a header. * RFC0822 does not specify a maximum length for headers, but we * truncate everything beyond an insane value of 1024 characters. */static char *ScanHeaderLine (FILE *datei, char *initial){  char *ptr=uuscan_shlline;  char *ptr2, *p1, *p2, *p3;  int llength, c;  long curpos;  int hadcr;  if (initial) {    _FP_strncpy (uuscan_shlline, initial, 1024);  }  else {    /* read first line */    if (feof (datei) || ferror (datei))      return NULL;    if (_FP_fgets (uuscan_shlline, 1023, datei) == NULL)      return NULL;    uuscan_shlline[1023] = '\0';  }  llength = strlen (uuscan_shlline);  hadcr   = 0;  /* strip whitespace at end */  ptr = uuscan_shlline + llength;  while (llength && isspace(*(ptr-1))) {    if (*(ptr-1) == '\012' || *(ptr-1) == '\015')      hadcr = 1;    ptr--; llength--;  }  if (llength == 0) {    uuscan_shlline[0] = '\0';    return uuscan_shlline;  }  while (!feof (datei)) {    c = fgetc (datei);    if (feof (datei))      break;    /*     * If the line didn't have a CR, it was longer than 256 characters     * and is continued anyway.     */    if (hadcr==1 && c != ' ' && c != '\t') {      /* no LWSP-char, header line does not continue */      ungetc (c, datei);      break;    }    while (!feof (datei) && (c == ' ' || c == '\t'))      c = fgetc (datei);    if (!feof (datei))      ungetc (c, datei);	/* push back for fgets() */    /* insert a single LWSP */    if (hadcr==1 && llength < 1023) {      *ptr++ = ' ';      llength++;    }    *ptr = '\0'; /* make lint happier */    if (feof (datei))      break;    /* read next line */    curpos = ftell (datei);    if (_FP_fgets (uugen_inbuffer, 255, datei) == NULL)      break;    uugen_inbuffer[255] = '\0';    if (IsLineEmpty (uugen_inbuffer)) { /* oops */      fseek (datei, curpos, SEEK_SET);      break;    }    _FP_strncpy (ptr, uugen_inbuffer, 1024-llength);    /*     * see if line was terminated with CR. Otherwise, it continues ...     */    c = strlen (ptr);    if (c>0 && (ptr[c-1] == '\012' || ptr[c-1] == '\015'))      hadcr = 1;    else      hadcr = 0;    /*     * strip whitespace     */    ptr     += c;    llength += c;    while (llength && isspace(*(ptr-1))) {      ptr--; llength--;    }  }  *ptr = '\0';  if (llength == 0)    return NULL;  /*   * Now that we've read the header line, we can RFC 1522-decode it   */  ptr = uuscan_shlline;  ptr2 = uuscan_shlline2;  while (*ptr) {    /*     * Look for =? magic     */    if (*ptr == '=' && *(ptr+1) == '?') {      /*       * Let p1 point to the charset, look for next question mark       */      p1 = p2 = ptr+2;      while (*p2 && *p2 != '?') {	p2++;      }      if (*p2 == '?' &&	  (*(p2+1) == 'q' || *(p2+1) == 'Q' ||	   *(p2+1) == 'b' || *(p2+1) == 'B') &&	  *(p2+2) == '?') {	/*	 * Let p2 point to the encoding, look for ?= magic	 */	p2++;	p3=p2+2;	while (*p3 && (*p3 != '?' || *(p3+1) != '=')) {	  p3++;	}	if (*p3 == '?' && *(p3+1) == '=') {	  /*	   * Alright, we've found an RFC 1522 header field	   */	  if (*p2 == 'q' || *p2 == 'Q') {	    c = UUDecodeField (p2+2, ptr2, QP_ENCODED);	  }	  else if (*p2 == 'b' || *p2 == 'B') {	    c = UUDecodeField (p2+2, ptr2, B64ENCODED);	  }	  if (c >= 0) {	    ptr2 += c;	    ptr = p3+2;	    continue;	  }	}      }    }    *ptr2++ = *ptr++;  }  *ptr2 = 0;  return uuscan_shlline2;}/* * Extract the value from a MIME attribute=value pair. This function * receives a pointer to the attribute. */static char *ParseValue (char *attribute){  char *ptr=uuscan_pvvalue;  int length=0;  if (attribute == NULL)    return NULL;  while ((isalnum(*attribute) || *attribute=='_') && *attribute != '=')    attribute++;  while (isspace(*attribute))    attribute++;  if (*attribute == '=') {    attribute++;    while (isspace (*attribute))      attribute++;  }  else    return NULL;  if (*attribute == '"') {    /* quoted-string */    attribute++;    while (*attribute && *attribute != '"' && length < 255) {      *ptr++ = *attribute++;      length++;    }    *ptr = '\0';  }  else {    /* tspecials from RFC1521 */    /*     * Note - exclude '[', ']' and ';' on popular request; these are     * used in some Content-Type fields by the Klez virus, and people     * who feed their virus scanners with the output of UUDeview would     * like to catch it!     */    while (*attribute && !isspace (*attribute) &&	   *attribute != '(' && *attribute != ')' &&	   *attribute != '<' && *attribute != '>' &&	   *attribute != '@' && *attribute != ',' &&	   /* *attribute != ';' && */ *attribute != ':' &&	   *attribute != '\\' &&*attribute != '"' &&	   *attribute != '/' && /* *attribute != '[' &&	   *attribute != ']' && */ *attribute != '?' &&	   *attribute != '=' && length < 255) {      *ptr++ = *attribute++;      length++;    }    *ptr = '\0';  }  return uuscan_pvvalue;}/* * Extract the information we need from header fields */static headers *ParseHeader (headers *theheaders, char *line){  char **variable=NULL;  char *value, *ptr, *thenew;  int delimit, length;  if (line == NULL)    return theheaders;  if (_FP_strnicmp (line, "From:", 5) == 0) {    if (theheaders->from) return theheaders;    variable = &theheaders->from;    value    = line+5;    delimit  = 0;  }  else if (_FP_strnicmp (line, "Subject:", 8) == 0) {    if (theheaders->subject) return theheaders;    variable = &theheaders->subject;    value    = line+8;    delimit  = 0;  }  else if (_FP_strnicmp (line, "To:", 3) == 0) {    if (theheaders->rcpt) return theheaders;    variable = &theheaders->rcpt;    value    = line+3;    delimit  = 0;  }  else if (_FP_strnicmp (line, "Date:", 5) == 0) {    if (theheaders->date) return theheaders;    variable = &theheaders->date;    value    = line+5;    delimit  = 0;  }  else if (_FP_strnicmp (line, "Mime-Version:", 13) == 0) {    if (theheaders->mimevers) return theheaders;    variable = &theheaders->mimevers;    value    = line+13;    delimit  = 0;  }  else if (_FP_strnicmp (line, "Content-Type:", 13) == 0) {    if (theheaders->ctype) return theheaders;    variable = &theheaders->ctype;    value    = line+13;    delimit  = ';';    /* we can probably extract more information */    if ((ptr = _FP_stristr (line, "boundary")) != NULL) {      if ((thenew = ParseValue (ptr))) {	if (theheaders->boundary) free (theheaders->boundary);	theheaders->boundary = _FP_strdup (thenew);      }    }    if ((ptr = _FP_stristr (line, "name")) != NULL) {      if ((thenew = ParseValue (ptr))) {	if (theheaders->fname) free (theheaders->fname);	theheaders->fname = _FP_strdup (thenew);      }    }    if ((ptr = _FP_stristr (line, "id")) != NULL) {      if ((thenew = ParseValue (ptr))) {	if (theheaders->mimeid) free (theheaders->mimeid);	theheaders->mimeid = _FP_strdup (thenew);      }    }    if ((ptr = _FP_stristr (line, "number")) != NULL) {      if ((thenew = ParseValue (ptr))) {	theheaders->partno = atoi (thenew);      }    }    if ((ptr = _FP_stristr (line, "total")) != NULL) {      if ((thenew = ParseValue (ptr))) {	theheaders->numparts = atoi (thenew);      }    }  }  else if (_FP_strnicmp (line, "Content-Transfer-Encoding:", 26) == 0) {    if (theheaders->ctenc) return theheaders;    variable = &theheaders->ctenc;    value    = line+26;    delimit  = ';';  }  else if (_FP_strnicmp (line, "Content-Disposition:", 20) == 0) {    /*     * Some encoders mention the original filename as parameter to     * Content-Type, others as parameter to Content-Disposition. We     * do prefer the first solution, but accept this situation, too.     * TODO: Read RFC1806     */    if ((ptr = _FP_stristr (line, "name")) != NULL) {      if (theheaders->fname == NULL && (thenew=ParseValue(ptr)) != NULL) {	theheaders->fname = _FP_strdup (thenew);

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?