📄 parse.c
字号:
#include <sys/types.h>#include <sys/stat.h>#include <fcntl.h>#include <ctype.h>#include <errno.h>#include "main.h"#include "struct.h"#define MAXLINE 4096typedef enum { ENCODE_UNSET, /* not set, act NORMAL (ho ho ho) */ ENCODE_NORMAL, ENCODE_QP, /* quoted printable */ ENCODE_MULTILINED, /* this is not a real type, but just a separator showing that the types below are encoded in a way that makes one line in the indata may become one or more lines in the outdata */ ENCODE_BASE64, /* base64 */ ENCODE_UUENCODE, /* well, it seems there exist some kind of semi-standard for uu-encoded attachments. */ ENCODE_UNKNOWN /* must be the last one */} EncodeType;typedef enum { CONTENT_TEXT, /* normal mails are text based default */ CONTENT_BINARY, /* this kind we store separately and href to */ CONTENT_HTML, /* this is html formated text */ CONTENT_IGNORE, /* don't care about this content */ CONTENT_UNKNOWN /* must be the last one */} ContentType;int preferedcontent(char *type){ if(!strcasecmp(type, "text/plain")) return 1; return 0;}/*** strcasestr() - case insensitive strstr()*/ char *strcasestr(char *haystack, char *needle){ int nlen = strlen(needle); int hlen = strlen(haystack); int i; int max; max = hlen-nlen; for (i=0; i<=max; i++) { if (!strncasecmp(haystack, needle, nlen)) return haystack; haystack++; } return NULL;}/*** RFC 2047 defines MIME extensions for mail headers.**** This function decodes that into binary/8bit data.**** Example:** =?iso-8859-1?q?I'm_called_?= =?iso-8859-1?q?Daniel?=**** Should result in "I'm called Daniel", but:**** =?iso-8859-1?q?I'm_called?= Daniel**** Should result in "I'm called Daniel" too.**** Returns the newly allcated string, or the previous if nothing changed */static char *mdecodeRFC2047( char *string, int length ){ char *iptr = string; char *oldptr; char *storage=malloc(length+1); char *output = storage; char charset[129]; char encoding[33]; char blurb[257]; char equal; int value; char didanything=FALSE; while (*iptr) { if (!strncmp(iptr, "=?", 2) && (3 == sscanf(iptr+2, "%128[^?]?%32[^?]?%256[^ ]", charset, encoding, blurb)) ) { /* This is a full, valid 'encoded-word'. Decode! */ char *ptr=blurb; ptr = strstr(blurb, "?="); if(ptr) { *ptr=0; } else { *output++ = *iptr++; /* it wasn't a real encoded-word */ continue; } ptr = blurb; didanything=TRUE; /* yes, we decode something */ /* we could've done this with a %n in the sscanf, but we know all sscanfs don't grok that */ iptr += 2+ strlen(charset) + 1 + strlen(encoding) + 1 + strlen(blurb) + 2; if (!strcasecmp("q", encoding)) { /* quoted printable decoding */ for ( ; *ptr; ptr++ ) { switch ( *ptr ) { case '=': sscanf( ptr+1, "%02X", &value ); *output++ = value; ptr += 2; break; case '_': *output++ = ' '; break; default: *output++ = *ptr; break; } } } else if (!strcasecmp("b", encoding)) { /* base64 decoding */ int length; base64Decode(ptr, output, &length); output += length-1; } else { /* unsupported encoding type */ strcpy(output, "<unknown>"); output += 9; } oldptr = iptr; /* save start position */ while (*iptr && isspace(*iptr)) iptr++; /* pass all whitespaces */ /* if this is an encoded word here, we should skip the passed whitespaces. If it isn't an encoded-word, we should include the whitespaces in the output. */ if (!strncmp(iptr, "=?", 2) && (4 == sscanf(iptr+2, "%128[^?]?%32[^?]?%128[^?]?%c", charset, encoding, blurb, &equal)) && ('=' == equal)) { continue; /* this IS an encoded-word, continue from here */ } else /* this IS NOT an encoded-word, move back to the first whitespace */ iptr = oldptr; } else *output++ = *iptr++; } *output=0; if (didanything) { /* this check prevents unneccessary strsav() calls if not needed */ free(string); /* free old memory */ #if 0 /* debug display */ printf("NEW: %s\n", storage); { unsigned char *f; puts("NEW:"); for (f=storage; f<output; f++) { if (isgraph(*f)) printf("%c", *f); else printf("%02X", (unsigned char)*f); } puts(""); } #endif return storage; /* return new */ } else { free (storage); return string; }}/*** Decode this [virtual] Quoted-Printable line as defined by RFC2045.** Written by Daniel.Stenberg@haxx.nu*/static void mdecodeQP(FILE *file, char *input, char **result, int *length){ int outcount=0; char *buffer=input; unsigned char inchar; char *output; int len=strlen(input); output=strdup(input); while ((inchar = *input) != '\0') { if (outcount>=len-1) { /* we need to enlarge the destination area! */ /* double the size each time enlargement is needed */ char *newp = realloc(output, len*2); if (newp) { output = newp; len *= 2; } else break; } input++; if ('=' == inchar) { int value; if (('\n'== *input) || (('\r' == input[0]) && ('\n' == input[1]))) { if (!fgets(buffer, MAXLINE, file)) break; input = buffer; continue; } else if ('=' == *input) { inchar='='; input++; /* pass this */ } else if (isxdigit(*input)) { sscanf(input, "%02X", &value); inchar = (unsigned char)value; input+=2; /* pass the two letters */ } else inchar='='; } output[outcount++] = inchar; } output[outcount]=0; /* zero terminate */ *result = output; *length = outcount;}/*** Parsing...** This loads in the mail from stdin or a file, adding the right** field variables to the right structures. If readone is set, it will** think anything it reads in is one article only.*/struct body * process(char *mbox, /* file name */ int use_stdin, /* read from stdin */ int readone) /* only one mail */{ char line[MAXLINE]; char *cp, *dp; FILE *fp; int num, isinheader; /* -- variables for the multipart/alternative parser -- */ struct body *origbp=NULL; /* store the original bp */ struct body *origlp=NULL; /* ... and the original lp */ char alternativeparser=FALSE; /* set when inside alternative parser mode */ /* -- end of alternative parser variables -- */ struct body *bp; struct body *lp=NULL; /* the last pointer, points to the last node in the body list. Initially set to NULL since we have none at the moment. */ struct body *headp=NULL; /* stored pointer to the point where we last scanned the headers of this mail. */ char Mime_B = FALSE; char boundbuffer[128]=""; struct boundary *boundp=NULL; /* This variable is used to store a stack of boundary separators in cases with mimed mails inside mimed mails */ char multilinenoend=FALSE; /* This variable is set TRUE if we have read a partial line off a multiline-encoded line, and the next line we read is supposed to get appended to the previous one */ int bodyflags=0; /* This variable is set to extra flags that the addbody() calls should OR in the flag parameter */ char *binname=NULL; /* file name to store binary attachments in */ int binfile=-1; char *boundary; char type[129]; /* for Content-Type */ EncodeType decode=ENCODE_UNSET; ContentType content=CONTENT_TEXT; if (use_stdin || !mbox || !strcasecmp(mbox, "NONE")) fp = stdin; else if ((fp = fopen(mbox, "r")) == NULL) { return; /* add error code */ } isinheader = 1; bp = NULL; while (fgets(line, MAXLINE, fp) != NULL) {#if 0 printf("IN: %s", line);#endif if (isinheader) { /* check for MIME */ if (!strncasecmp( line, "MIME-Version:", 13)) Mime_B = TRUE; else if (isspace(line[0]) && ('\n' != line[0]) && ('\r' != line[0])) { /* ** since this begins with a whitespace, it means the ** previous line is continued on this line, leave only ** one space character and go! */ char *ptr=line; while (isspace(*ptr)) ptr++; ptr--; /* leave one space */ *ptr=' '; /* make it a true space, no tabs here! */#if 0 decodeRFC2047(ptr+1, MAXLINE-(ptr+2-line));#endif bp = addbody(bp, &lp, ptr, BODY_CONTINUE|BODY_HEADER|bodyflags); } else if ((line[0] == '\n') || (line[0] == '\r')) { struct body *head; char savealternative; /* ** we mark this as a header-line, and we use it to ** track end-of-header displays */ bp = addbody(bp, &lp, line, BODY_HEADER|bodyflags); isinheader--;#if 0 printf("HEADER status: %d\n", isinheader);#endif /* ** This signals us that we are no longer in the header, ** let's fill in all those fields we are interested in. ** Parse the headers up to now and copy to the target ** variables */ for (head = bp; head; head=head->next) { if (head->header && !head->demimed) { head->line = mdecodeRFC2047(head->line, strlen(head->line)); head->demimed=TRUE; /* don't do this again */ } } if (!headp) headp=bp; savealternative = FALSE; for (head = headp; head; head=head->next) { if(head->parsedheader || !head->header) continue; if (!strncasecmp( head->line, "Content-Type:", 13)) { char *ptr=head->line+13;#define DISP_HREF 1#define DISP_IMG 2#define DISP_IGNORE 3 /* default is href to the attachment: */ char disposition=DISP_HREF; /* we must make sure this is not parsed more times than this */ head->parsedheader= TRUE; while (isspace(*ptr)) ptr++; sscanf(ptr, "%128[^;]", type); if ((cp = strchr(type, '\r')) != NULL) *cp = '\0'; /* rm CR */ if ((cp = strchr(type, '\n')) != NULL) *cp = '\0'; /* rm LF */ if(alternativeparser) { /* We are parsing alternatives... */ if(preferedcontent(type) ) { /* ... this is a prefered type, we want to store this [instead of the earlier one]. */#if 0 struct body *next; printf("%s is more fun than the previous one\n", type);#endif#if 0 /* ** Not sure why this free section is here. ** It is causing purify to barf with massive numbers of ** "FMR: Free memory reads". When I commented it out it ** cleared up the problem with no associated memory leaked ** or difference in output. It's history for now. */ while(bp) { next=bp->next; if (bp->line) free(bp->line);
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -