📄 htparse.c
字号:
** /etc/junk/./fred becomes /etc/junk/fred**** but we should NOT change** http://fred.xxx.edu/../..**** or ../../albert.html*/PUBLIC void HTSimplify ARGS1( char *, filename){ char *p; char *q, *q1; if (filename == NULL) return; if (!(filename[0] && filename[1]) || filename[0] == '?' || filename[1] == '?' || filename[2] == '?') return; if (strchr(filename, '/') != NULL) { for (p = (filename + 2); *p; p++) { if (*p == '?') { /* ** We're still treating a ?searchpart as part of ** the path in HTParse() and scan(), but if we ** encounter a '?' here, assume it's the delimiter ** and break. We also could check for a parameter ** delimiter (';') here, but the current Fielding ** draft (wisely or ill-advisedly :) says that it ** should be ignored and collapsing be allowed in ** it's value). The only defined parameter at ** present is ;type=[A, I, or D] for ftp URLs, so ** if there's a "/..", "/../", "/./", or terminal ** '.' following the ';', it must be due to the ** ';' being an unescaped path character and not ** actually a parameter delimiter. - FM */ break; } if (*p == '/') { if ((p[1] == '.') && (p[2] == '.') && (p[3] == '/' || p[3] == '?' || p[3] == '\0')) { /* ** Handle "../", "..?" or "..". */ for (q = (p - 1); (q >= filename) && (*q != '/'); q--) /* ** Back up to previous slash or beginning of string. */ ; if ((q[0] == '/') && (strncmp(q, "/../", 4) && strncmp(q, "/..?", 4)) && !((q - 1) > filename && q[-1] == '/')) { /* ** Not at beginning of string or in a ** host field, so remove the "/xxx/..". */ q1 = (p + 3); p = q; while (*q1 != '\0') *p++ = *q1++; *p = '\0'; /* terminate */#ifdef NOTDEFINED /* ** Make sure filename has at least one slash. */ if (*filename == '\0') { *filename = '/'; *(filename + 1) = '\0'; }#endif /* NOTDEFINED */ /* ** Start again with previous slash. */ p = (q - 1); } } else if (p[1] == '.' && p[2] == '/') { /* ** Handle "./" by removing both characters. */ q = p; q1 = (p + 2); while (*q1 != '\0') *q++ = *q1++; *q = '\0'; /* terminate */ p--; } else if (p[1] == '.' && p[2] == '?') { /* ** Handle ".?" by removing the dot. */ q = (p + 1); q1 = (p + 2); while (*q1 != '\0') *q++ = *q1++; *q = '\0'; /* terminate */ p--; } else if (p[1] == '.' && p[2] == '\0') { /* ** Handle terminal "." by removing the character. */ p[1] = '\0'; } } } if (p >= filename + 2 && *p == '?' && *(p-1) == '.') { if (*(p-2) == '/') { /* ** Handle "/.?" by removing the dot. */ q = p - 1; q1 = p; while (*q1 != '\0') *q++ = *q1++; *q = '\0'; } else if (*(p-2) == '.' && p >= filename + 4 && *(p-3) == '/' && (*(p-4) != '/' || (p > filename + 4 && *(p-5) != ':'))) { /* ** Handle "xxx/..?" */ for (q = (p - 4); (q > filename) && (*q != '/'); q--) /* ** Back up to previous slash or beginning of string. */ ; if (*q == '/') { if (q > filename && *(q-1) == '/' && !(q > filename + 1 && *(q-1) != ':')) return; q++; } if (strncmp(q, "../", 3) && strncmp(q, "./", 2)) { /* ** Not after "//" at beginning of string or ** after "://", and xxx is not ".." or ".", ** so remove the "xxx/..". */ q1 = p; p = q; while (*q1 != '\0') *p++ = *q1++; *p = '\0'; /* terminate */ } } } }}/* Make Relative Name. HTRelative()** -------------------**** This function creates and returns a string which gives an expression of** one address as related to another. Where there is no relation, an absolute** address is retured.**** On entry,** Both names must be absolute, fully qualified names of nodes** (no anchor bits)**** On exit,** The return result points to a newly allocated name which, if** parsed by HTParse relative to relatedName, will yield aName.** The caller is responsible for freeing the resulting name later.***/PUBLIC char * HTRelative ARGS2( CONST char *, aName, CONST char *, relatedName){ char * result = NULL; CONST char *p = aName; CONST char *q = relatedName; CONST char * after_access = NULL; CONST char * path = NULL; CONST char * last_slash = NULL; int slashes = 0; for (; *p; p++, q++) { /* Find extent of match */ if (*p != *q) break; if (*p == ':') after_access = p+1; if (*p == '/') { last_slash = p; slashes++; if (slashes == 3) path=p; } } /* q, p point to the first non-matching character or zero */ if (!after_access) { /* Different access */ StrAllocCopy(result, aName); } else if (slashes < 3){ /* Different nodes */ StrAllocCopy(result, after_access); } else if (slashes == 3){ /* Same node, different path */ StrAllocCopy(result, path); } else { /* Some path in common */ int levels = 0; for (; *q && (*q != '#'); q++) if (*q == '/') levels++; result = typecallocn(char, 3*levels + strlen(last_slash) + 1); if (result == NULL) outofmem(__FILE__, "HTRelative"); result[0] = '\0'; for (; levels; levels--) strcat(result, "../"); strcat(result, last_slash+1); } CTRACE((tfp, "HTparse: `%s' expressed relative to\n `%s' is\n `%s'.\n", aName, relatedName, result)); return result;}/* Escape undesirable characters using % HTEscape()** -------------------------------------**** This function takes a pointer to a string in which** some characters may be unacceptable unescaped.** It returns a string which has these characters** represented by a '%' character followed by two hex digits.**** Unlike HTUnEscape(), this routine returns a calloced string.*/PRIVATE CONST unsigned char isAcceptable[96] =/* Bit 0 xalpha -- see HTFile.h** Bit 1 xpalpha -- as xalpha but with plus.** Bit 3 ... path -- as xpalphas but with /*/ /* 0 1 2 3 4 5 6 7 8 9 A B C D E F */ { 0,0,0,0,0,0,0,0,0,0,7,6,0,7,7,4, /* 2x !"#$%&'()*+,-./ */ 7,7,7,7,7,7,7,7,7,7,0,0,0,0,0,0, /* 3x 0123456789:;<=>? */ 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, /* 4x @ABCDEFGHIJKLMNO */ 7,7,7,7,7,7,7,7,7,7,7,0,0,0,0,7, /* 5X PQRSTUVWXYZ[\]^_ */ 0,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, /* 6x `abcdefghijklmno */ 7,7,7,7,7,7,7,7,7,7,7,0,0,0,0,0 }; /* 7X pqrstuvwxyz{|}~ DEL */PRIVATE char *hex = "0123456789ABCDEF";#define ACCEPTABLE(a) ( a>=32 && a<128 && ((isAcceptable[a-32]) & mask))PUBLIC char * HTEscape ARGS2( CONST char *, str, unsigned char, mask){ CONST char * p; char * q; char * result; int unacceptable = 0; for (p = str; *p; p++) if (!ACCEPTABLE(UCH(TOASCII(*p)))) unacceptable++; result = typecallocn(char, p-str + unacceptable + unacceptable + 1); if (result == NULL) outofmem(__FILE__, "HTEscape"); for (q = result, p = str; *p; p++) { unsigned char a = TOASCII(*p); if (!ACCEPTABLE(a)) { *q++ = HEX_ESCAPE; /* Means hex commming */ *q++ = hex[a >> 4]; *q++ = hex[a & 15]; } else *q++ = *p; } *q++ = '\0'; /* Terminate */ return result;}/* Escape undesirable characters using % but space to +. HTEscapeSP()** -----------------------------------------------------**** This function takes a pointer to a string in which** some characters may be unacceptable unescaped.** It returns a string which has these characters** represented by a '%' character followed by two hex digits,** except that spaces are converted to '+' instead of %2B.**** Unlike HTUnEscape(), this routine returns a calloced string.*/PUBLIC char * HTEscapeSP ARGS2( CONST char *, str, unsigned char, mask){ CONST char * p; char * q; char * result; int unacceptable = 0; for (p = str; *p; p++) if (!(*p == ' ' || ACCEPTABLE(UCH(TOASCII(*p))))) unacceptable++; result = typecallocn(char, p-str + unacceptable + unacceptable + 1); if (result == NULL) outofmem(__FILE__, "HTEscape"); for (q = result, p = str; *p; p++) { unsigned char a = TOASCII(*p); if (a == 32) { *q++ = '+'; } else if (!ACCEPTABLE(a)) { *q++ = HEX_ESCAPE; /* Means hex commming */ *q++ = hex[a >> 4]; *q++ = hex[a & 15]; } else { *q++ = *p; } } *q++ = '\0'; /* Terminate */ return result;}/* Decode %xx escaped characters. HTUnEscape()** ------------------------------**** This function takes a pointer to a string in which some** characters may have been encoded in %xy form, where xy is** the acsii hex code for character 16x+y.** The string is converted in place, as it will never grow.*/PRIVATE char from_hex ARGS1( char, c){ return (char) ( c >= '0' && c <= '9' ? c - '0' : c >= 'A' && c <= 'F'? c - 'A' + 10 : c - 'a' + 10); /* accept small letters just in case */}PUBLIC char * HTUnEscape ARGS1( char *, str){ char * p = str; char * q = str; if (!(p && *p)) return str; while (*p != '\0') { if (*p == HEX_ESCAPE && /* * Tests shouldn't be needed, but better safe than sorry. */ p[1] && p[2] && isxdigit(UCH(p[1])) && isxdigit(UCH(p[2]))) { p++; if (*p) *q = (char) (from_hex(*p++) * 16); if (*p) { /* ** Careful! FROMASCII() may evaluate its arg more than once! */ /* S/390 -- gil -- 0221 */ *q = (char) (*q + from_hex(*p++)); } *q = FROMASCII(*q); q++; } else { *q++ = *p++; } } *q++ = '\0'; return str;} /* HTUnEscape *//* Decode some %xx escaped characters. HTUnEscapeSome()** ----------------------------------- Klaus Weide** (kweide@tezcat.com)** This function takes a pointer to a string in which some** characters may have been encoded in %xy form, where xy is** the acsii hex code for character 16x+y, and a pointer to** a second string containing one or more characters which** should be unescaped if escaped in the first string.** The first string is converted in place, as it will never grow.*/PUBLIC char * HTUnEscapeSome ARGS2( char *, str, CONST char *, do_trans){ char * p = str; char * q = str; char testcode; if (p == NULL || *p == '\0' || do_trans == NULL || *do_trans == '\0') return str; while (*p != '\0') { if (*p == HEX_ESCAPE && p[1] && p[2] && /* tests shouldn't be needed, but.. */ isxdigit(UCH(p[1])) && isxdigit(UCH(p[2])) && (testcode = (char) FROMASCII(from_hex(p[1])*16 + from_hex(p[2]))) && /* %00 no good*/ strchr(do_trans, testcode)) { /* it's one of the ones we want */ *q++ = testcode; p += 3; } else { *q++ = *p++; } } *q++ = '\0'; return str;} /* HTUnEscapeSome */PRIVATE CONST unsigned char crfc[96] =/* Bit 0 xalpha -- need "quoting"** Bit 1 xpalpha -- need \escape if quoted*/ /* 0 1 2 3 4 5 6 7 8 9 A B C D E F */ { 1,0,3,0,0,0,0,0,1,1,0,0,1,0,1,0, /* 2x !"#$%&'()*+,-./ */ 0,0,0,0,0,0,0,0,0,0,1,1,1,0,1,0, /* 3x 0123456789:;<=>? */ 1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 4x @ABCDEFGHIJKLMNO */ 0,0,0,0,0,0,0,0,0,0,0,1,2,1,0,0, /* 5X PQRSTUVWXYZ[\]^_ */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 6x `abcdefghijklmno */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3 }; /* 7X pqrstuvwxyz{|}~ DEL *//*** Turn a string which is not a RFC 822 token into a quoted-string. - KW*/PUBLIC void HTMake822Word ARGS1( char **, str){ CONST char * p; char * q; char * result; unsigned char a; int added = 0; if (!(*str) || !(**str)) { StrAllocCopy(*str, "\"\""); return; } for (p = *str; *p; p++) { a = TOASCII(*p); /* S/390 -- gil -- 0240 */ if (a < 32 || a >= 128 || ((crfc[a-32]) & 1)) { if (!added) added = 2; if (a >= 160 || a == '\t') continue; if (a == '\r' || a == '\n') added += 2; else if ((a & 127) < 32 || ((crfc[a-32]) & 2)) added++; } } if (!added) return; result = typecallocn(char, p-(*str) + added + 1); if (result == NULL) outofmem(__FILE__, "HTMake822Word"); result[0] = '"'; /* ** Having converted the character to ASCII, we can't use symbolic ** escape codes, since they're in the host character set, which ** is not necessarily ASCII. Thus we use octal escape codes instead. ** -- gil (Paul Gilmartin) <pg@sweng.stortek.com> */ /* S/390 -- gil -- 0268 */ for (q = result + 1, p = *str; *p; p++) { a = TOASCII(*p); if ((a != '\011') && ((a & 127) < 32 || ( a < 128 && ((crfc[a-32]) & 2)))) *q++ = '\033'; *q++ = *p; if (a == '\012' || (a == '\015' && (TOASCII(*(p+1)) != '\012'))) *q++ = ' '; } *q++ = '"'; *q++ = '\0'; /* Terminate */ FREE(*str); *str = result;}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -