📄 htmime.c
字号:
/* MIME Message Parse HTMIME.c** ==================**** This is RFC 1341-specific code.** The input stream pushed into this parser is assumed to be** stripped on CRs, ie lines end with LF, not CR LF.** (It is easy to change this except for the body part where** conversion can be slow.)**** History:** Feb 92 Written Tim Berners-Lee, CERN***/#include <HTUtils.h>#include <HTMIME.h> /* Implemented here */#include <HTTP.h> /* for redirecting_url */#include <HTAlert.h>#include <HTCJK.h>#include <UCMap.h>#include <UCDefs.h>#include <UCAux.h>#include <LYCookie.h>#include <LYCharSets.h>#include <LYCharUtils.h>#include <LYStrings.h>#include <LYUtils.h>#include <LYLeaks.h>/* MIME Object** -----------*/typedef enum { MIME_TRANSPARENT, /* put straight through to target ASAP! */ miBEGINNING_OF_LINE, /* first character and not a continuation */ miA, miACCEPT_RANGES, miAGE, miAL, miALLOW, miALTERNATES, miC, miCACHE_CONTROL, miCO, miCOOKIE, miCON, miCONNECTION, miCONTENT_, miCONTENT_BASE, miCONTENT_DISPOSITION, miCONTENT_ENCODING, miCONTENT_FEATURES, miCONTENT_L, miCONTENT_LANGUAGE, miCONTENT_LENGTH, miCONTENT_LOCATION, miCONTENT_MD5, miCONTENT_RANGE, miCONTENT_T, miCONTENT_TRANSFER_ENCODING, miCONTENT_TYPE, miDATE, miE, miETAG, miEXPIRES, miKEEP_ALIVE, miL, miLAST_MODIFIED, miLINK, miLOCATION, miP, miPR, miPRAGMA, miPROXY_AUTHENTICATE, miPUBLIC, miR, miRE, miREFRESH, miRETRY_AFTER, miS, miSAFE, miSE, miSERVER, miSET_COOKIE, miSET_COOKIE1, miSET_COOKIE2, miT, miTITLE, miTRANSFER_ENCODING, miU, miUPGRADE, miURI, miV, miVARY, miVIA, miW, miWARNING, miWWW_AUTHENTICATE, miSKIP_GET_VALUE, /* Skip space then get value */ miGET_VALUE, /* Get value till white space */ miJUNK_LINE, /* Ignore the rest of this folded line */ miNEWLINE, /* Just found a LF .. maybe continuation */ miCHECK, /* check against check_pointer */ MIME_NET_ASCII, /* Translate from net ascii */ MIME_IGNORE /* Ignore entire file */ /* TRANSPARENT and IGNORE are defined as stg else in _WINDOWS */} MIME_state;#define VALUE_SIZE 5120 /* @@@@@@@ Arbitrary? */struct _HTStream { CONST HTStreamClass * isa; BOOL net_ascii; /* Is input net ascii? */ MIME_state state; /* current state */ MIME_state if_ok; /* got this state if match */ MIME_state field; /* remember which field */ MIME_state fold_state; /* state on a fold */ BOOL head_only; /* only parsing header */ BOOL pickup_redirection; /* parsing for location */ BOOL no_streamstack; /* use sink directly */ CONST char * check_pointer; /* checking input */ char * value_pointer; /* storing values */ char value[VALUE_SIZE]; HTParentAnchor * anchor; /* Given on creation */ HTStream * sink; /* Given on creation */ char * boundary; /* For multipart */ char * set_cookie; /* Set-Cookie */ char * set_cookie2; /* Set-Cookie2 */ char * location; /* Location */ char * refresh_url; /* "Refresh:" URL */ HTFormat encoding; /* Content-Transfer-Encoding */ char * compression_encoding; HTFormat format; /* Content-Type */ HTStream * target; /* While writing out */ HTStreamClass targetClass; HTAtom * targetRep; /* Converting into? */};/*** This function is for trimming off any paired** open- and close-double quotes from header values.** It does not parse the string for embedded quotes,** and will not modify the string unless both the** first and last characters are double-quotes. - FM*/PUBLIC void HTMIME_TrimDoubleQuotes ARGS1( char *, value){ int i; char *cp = value; if (!(cp && *cp) || *cp != '\"') return; i = strlen(cp); if (cp[(i - 1)] != '\"') return; else cp[(i - 1)] = '\0'; for (i = 0; value[i]; i++) value[i] = cp[(i +1)];}PRIVATE BOOL content_is_compressed ARGS1(HTStream *, me){ char *encoding = me->anchor->content_encoding; return encoding != 0 && strcmp(encoding, "8bit") != 0 && strcmp(encoding, "7bit") != 0 && strcmp(encoding, "binary") != 0;}/* * Strip quotes from a refresh-URL. */PRIVATE void dequote ARGS1(char *, url){ int len; len = strlen(url); if (*url == '\'' && len > 1 && url[len-1] == url[0]) { url[len-1] = '\0'; while ((url[0] = url[1]) != '\0') { ++url; } }}PRIVATE int pumpData ARGS1(HTStream *, me){ if (strchr(HTAtom_name(me->format), ';') != NULL) { char *cp = NULL, *cp1, *cp2, *cp3 = NULL, *cp4; CTRACE((tfp, "HTMIME: Extended MIME Content-Type is %s\n", HTAtom_name(me->format))); StrAllocCopy(cp, HTAtom_name(me->format)); /* ** Note that the Content-Type value was converted ** to lower case when we loaded into me->format, ** but there may have been a mixed or upper-case ** atom, so we'll force lower-casing again. We ** also stripped spaces and double-quotes, but ** we'll make sure they're still gone from any ** charset parameter we check. - FM */ LYLowerCase(cp); if ((cp1 = strchr(cp, ';')) != NULL) { BOOL chartrans_ok = NO; if ((cp2 = strstr(cp1, "charset")) != NULL) { int chndl; cp2 += 7; while (*cp2 == ' ' || *cp2 == '=' || *cp2 == '\"') cp2++; StrAllocCopy(cp3, cp2); /* copy to mutilate more */ for (cp4 = cp3; (*cp4 != '\0' && *cp4 != '\"' && *cp4 != ';' && *cp4 != ':' && !WHITE(*cp4)); cp4++) ; /* do nothing */ *cp4 = '\0'; cp4 = cp3; chndl = UCGetLYhndl_byMIME(cp3); if (UCCanTranslateFromTo(chndl, current_char_set)) { chartrans_ok = YES; *cp1 = '\0'; me->format = HTAtom_for(cp); StrAllocCopy(me->anchor->charset, cp4); HTAnchor_setUCInfoStage(me->anchor, chndl, UCT_STAGE_MIME, UCT_SETBY_MIME); } else if (chndl < 0) {/* got something but we don't recognize it */ chndl = UCLYhndl_for_unrec; if (chndl < 0) /* ** UCLYhndl_for_unrec not defined :-( ** fallback to UCLYhndl_for_unspec ** which always valid. */ chndl = UCLYhndl_for_unspec; /* always >= 0 */ if (UCCanTranslateFromTo(chndl, current_char_set)) { chartrans_ok = YES; *cp1 = '\0'; me->format = HTAtom_for(cp); HTAnchor_setUCInfoStage(me->anchor, chndl, UCT_STAGE_MIME, UCT_SETBY_DEFAULT); } } if (chartrans_ok) { LYUCcharset * p_in = HTAnchor_getUCInfoStage(me->anchor, UCT_STAGE_MIME); LYUCcharset * p_out = HTAnchor_setUCInfoStage(me->anchor, current_char_set, UCT_STAGE_HTEXT, UCT_SETBY_DEFAULT); if (!p_out) /* ** Try again. */ p_out = HTAnchor_getUCInfoStage(me->anchor, UCT_STAGE_HTEXT); if (!strcmp(p_in->MIMEname, "x-transparent")) { HTPassEightBitRaw = TRUE; HTAnchor_setUCInfoStage(me->anchor, HTAnchor_getUCLYhndl(me->anchor, UCT_STAGE_HTEXT), UCT_STAGE_MIME, UCT_SETBY_DEFAULT); } if (!strcmp(p_out->MIMEname, "x-transparent")) { HTPassEightBitRaw = TRUE; HTAnchor_setUCInfoStage(me->anchor, HTAnchor_getUCLYhndl(me->anchor, UCT_STAGE_MIME), UCT_STAGE_HTEXT, UCT_SETBY_DEFAULT); } if (p_in->enc != UCT_ENC_CJK) { HTCJK = NOCJK; if (!(p_in->codepoints & UCT_CP_SUBSETOF_LAT1) && chndl == current_char_set) { HTPassEightBitRaw = TRUE; } } else if (p_out->enc == UCT_ENC_CJK) { Set_HTCJK(p_in->MIMEname, p_out->MIMEname); } } else { /* ** Cannot translate. ** If according to some heuristic the given ** charset and the current display character ** both are likely to be like ISO-8859 in ** structure, pretend we have some kind ** of match. */ BOOL given_is_8859 = (BOOL) (!strncmp(cp4, "iso-8859-", 9) && isdigit(UCH(cp4[9]))); BOOL given_is_8859like = (BOOL) (given_is_8859 || !strncmp(cp4, "windows-", 8) || !strncmp(cp4, "cp12", 4) || !strncmp(cp4, "cp-12", 5)); BOOL given_and_display_8859like = (BOOL) (given_is_8859like && (strstr(LYchar_set_names[current_char_set], "ISO-8859") || strstr(LYchar_set_names[current_char_set], "windows-"))); if (given_and_display_8859like) { *cp1 = '\0'; me->format = HTAtom_for(cp); } if (given_is_8859) { cp1 = &cp4[10]; while (*cp1 && isdigit(UCH(*cp1))) cp1++; *cp1 = '\0'; } if (given_and_display_8859like) { StrAllocCopy(me->anchor->charset, cp4); HTPassEightBitRaw = TRUE; } HTAlert(*cp4 ? cp4 : me->anchor->charset); } FREE(cp3); } else { /* ** No charset parameter is present. ** Ignore all other parameters, as ** we do when charset is present. - FM */ *cp1 = '\0'; me->format = HTAtom_for(cp); } } FREE(cp); } /* ** If we have an Expires header and haven't ** already set the no_cache element for the ** anchor, check if we should set it based ** on that header. - FM */ if (me->anchor->no_cache == FALSE && me->anchor->expires != NULL) { if (!strcmp(me->anchor->expires, "0")) { /* * The value is zero, which we treat as * an absolute no-cache directive. - FM */ me->anchor->no_cache = TRUE; } else if (me->anchor->date != NULL) { /* ** We have a Date header, so check if ** the value is less than or equal to ** that. - FM */ if (LYmktime(me->anchor->expires, TRUE) <= LYmktime(me->anchor->date, TRUE)) { me->anchor->no_cache = TRUE; } } else if (LYmktime(me->anchor->expires, FALSE) == 0) { /* ** We don't have a Date header, and ** the value is in past for us. - FM */ me->anchor->no_cache = TRUE; } } StrAllocCopy(me->anchor->content_type, HTAtom_name(me->format)); if (me->set_cookie != NULL || me->set_cookie2 != NULL) { LYSetCookie(me->set_cookie, me->set_cookie2, me->anchor->address); FREE(me->set_cookie); FREE(me->set_cookie2); } if (me->pickup_redirection) { if (me->location && *me->location) { redirecting_url = me->location; me->location = NULL; if (me->targetRep != WWW_DEBUG || me->sink) me->head_only = YES; } else { permanent_redirection = FALSE; if (me->location) { CTRACE((tfp, "HTTP: 'Location:' is zero-length!\n")); HTAlert(REDIRECTION_WITH_BAD_LOCATION); } CTRACE((tfp, "HTTP: Failed to pick up location.\n")); if (me->location) { FREE(me->location); } else { HTAlert(REDIRECTION_WITH_NO_LOCATION); } } } if (me->head_only) { /* We are done! - kw */ me->state = MIME_IGNORE; return HT_OK; } if (me->no_streamstack) { me->target = me->sink; } else { if (!me->compression_encoding) { CTRACE((tfp, "HTMIME: MIME Content-Type is '%s', converting to '%s'\n", HTAtom_name(me->format), HTAtom_name(me->targetRep))); } else { /* ** Change the format to that for "www/compressed" ** and set up a stream to deal with it. - FM */ CTRACE((tfp, "HTMIME: MIME Content-Type is '%s',\n", HTAtom_name(me->format))); me->format = HTAtom_for("www/compressed"); CTRACE((tfp, " Treating as '%s'. Converting to '%s'\n", HTAtom_name(me->format), HTAtom_name(me->targetRep))); FREE(me->compression_encoding); } me->target = HTStreamStack(me->format, me->targetRep, me->sink , me->anchor); if (!me->target) { CTRACE((tfp, "HTMIME: Can't translate! ** \n")); me->target = me->sink; /* Cheat */ } } if (me->target) { me->targetClass = *me->target->isa; /* ** Check for encoding and select state from there, ** someday, but until we have the relevant code, ** from now push straight through. - FM */ me->state = MIME_TRANSPARENT; /* Pump rest of data right through */ } else { me->state = MIME_IGNORE; /* What else to do? */ } if (me->refresh_url != NULL && !content_is_compressed(me)) { char *url = NULL; char *num = NULL; char *txt = NULL; char *base = ""; /* FIXME: refresh_url may be relative to doc */ LYParseRefreshURL(me->refresh_url, &num, &url); if (url != NULL && me->format == WWW_HTML) { CTRACE((tfp, "Formatting refresh-url as first line of result\n")); HTSprintf0(&txt, gettext("Refresh: ")); HTSprintf(&txt, gettext("%s seconds "), num); dequote(url); HTSprintf(&txt, "<a href=\"%s%s\">%s</a><br>", base, url, url); CTRACE((tfp, "URL %s%s\n", base, url)); (me->isa->put_string)(me, txt); free(txt); } FREE(num); FREE(url); } return HT_OK;}PRIVATE int dispatchField ARGS1(HTStream *, me){ int i, j; char *cp; *me->value_pointer = '\0'; cp = me->value_pointer; while ((cp > me->value) && *(--cp) == ' ') /* S/390 -- gil -- 0146 */ /* ** Trim trailing spaces. */ *cp = '\0'; switch (me->field) { case miACCEPT_RANGES: HTMIME_TrimDoubleQuotes(me->value); CTRACE((tfp, "HTMIME: PICKED UP Accept-Ranges: '%s'\n", me->value)); break; case miAGE: HTMIME_TrimDoubleQuotes(me->value); CTRACE((tfp, "HTMIME: PICKED UP Age: '%s'\n", me->value)); break; case miALLOW: HTMIME_TrimDoubleQuotes(me->value); CTRACE((tfp, "HTMIME: PICKED UP Allow: '%s'\n", me->value)); break; case miALTERNATES: HTMIME_TrimDoubleQuotes(me->value); CTRACE((tfp, "HTMIME: PICKED UP Alternates: '%s'\n", me->value)); break; case miCACHE_CONTROL: HTMIME_TrimDoubleQuotes(me->value); CTRACE((tfp, "HTMIME: PICKED UP Cache-Control: '%s'\n", me->value)); if (!(me->value && *me->value)) break; /* ** Convert to lowercase and indicate in anchor. - FM */ LYLowerCase(me->value); StrAllocCopy(me->anchor->cache_control, me->value); /* ** Check whether to set no_cache for the anchor. - FM */ { char *cp1, *cp0 = me->value; while ((cp1 = strstr(cp0, "no-cache")) != NULL) { cp1 += 8; while (*cp1 != '\0' && WHITE(*cp1)) cp1++; if (*cp1 == '\0' || *cp1 == ';') { me->anchor->no_cache = TRUE; break; } cp0 = cp1; } if (me->anchor->no_cache == TRUE) break; cp0 = me->value; while ((cp1 = strstr(cp0, "max-age")) != NULL) { cp1 += 7; while (*cp1 != '\0' && WHITE(*cp1)) cp1++; if (*cp1 == '=') { cp1++; while (*cp1 != '\0' && WHITE(*cp1)) cp1++; if (isdigit(UCH(*cp1))) { cp0 = cp1; while (isdigit(UCH(*cp1))) cp1++; if (*cp0 == '0' && cp1 == (cp0 + 1)) { me->anchor->no_cache = TRUE; break; } } } cp0 = cp1; } } break; case miCOOKIE: HTMIME_TrimDoubleQuotes(me->value); CTRACE((tfp, "HTMIME: PICKED UP Cookie: '%s'\n", me->value)); break; case miCONNECTION: HTMIME_TrimDoubleQuotes(me->value); CTRACE((tfp, "HTMIME: PICKED UP Connection: '%s'\n", me->value)); break; case miCONTENT_BASE: HTMIME_TrimDoubleQuotes(me->value); CTRACE((tfp, "HTMIME: PICKED UP Content-Base: '%s'\n", me->value)); if (!(me->value && *me->value)) break; /*
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -