⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 htparse.c

📁 用于linux和其他unix下面的
💻 C
📖 第 1 页 / 共 2 页
字号:
/*		Parse HyperText Document Address		HTParse.c**		================================*/#include <HTUtils.h>#include <HTParse.h>#include <LYLeaks.h>#define HEX_ESCAPE '%'struct struct_parts {	char * access;	char * host;	char * absolute;	char * relative;	char * search;		/* treated normally as part of path */	char * anchor;};/*	Strip white space off a string.				HTStrip()**	-------------------------------**** On exit,**	Return value points to first non-white character, or to 0 if none.**	All trailing white space is OVERWRITTEN with zero.*/PUBLIC char * HTStrip ARGS1(	char *,		s){#define SPACE(c) ((c == ' ') || (c == '\t') || (c == '\n'))    char * p = s;    for (p = s; *p; p++)	;			/* Find end of string */    for (p--; p >= s; p--) {	if (SPACE(*p))	    *p = '\0';		/* Zap trailing blanks */	else	    break;    }    while (SPACE(*s))	s++;			/* Strip leading blanks */    return s;}/*	Scan a filename for its consituents.			scan()**	------------------------------------**** On entry,**	name	points to a document name which may be incomplete.** On exit,**	absolute or relative may be nonzero (but not both).**	host, anchor and access may be nonzero if they were specified.**	Any which are nonzero point to zero terminated strings.*/PRIVATE void scan ARGS2(	char *,			name,	struct struct_parts *,	parts){    char * after_access;    char * p;#ifdef NOTDEFINED    int length = strlen(name);#endif /* NOTDEFINED */    parts->access = NULL;    parts->host = NULL;    parts->absolute = NULL;    parts->relative = NULL;    parts->search = NULL;	/* normally not used - kw */    parts->anchor = NULL;    /*    **	Scan left-to-right for a scheme (access).    */    after_access = name;    for (p = name; *p; p++) {	if (*p==':') {	    *p = '\0';	    parts->access = name;	/* Access name has been specified */	    after_access = (p + 1);	    break;	}	if (*p == '/' || *p == '#' || *p == ';' || *p == '?')	    break;    }#ifdef NOTDEFINED    for (p = (name + length-1); p >= name; p--) {}#endif /* NOTDEFINED */    /*    **	Scan left-to-right for a fragment (anchor).    */    for (p = after_access; *p; p++) {	if (*p =='#') {	    parts->anchor = (p + 1);	    *p = '\0';			/* terminate the rest */	    break;		/* leave things after first # alone - kw */	}    }    /*    **	Scan left-to-right for a host or absolute path.    */    p = after_access;    if (*p == '/') {	if (p[1] == '/') {	    parts->host = (p + 2);	  /* host has been specified	*/	    *p = '\0';			  /* Terminate access		*/	    p = strchr(parts->host, '/'); /* look for end of host name if any */	    if (p != NULL) {		*p = '\0';			/* Terminate host */		parts->absolute = (p + 1);	/* Root has been found */	    } else {		p = strchr(parts->host, '?');		if (p != NULL) {		    *p = '\0';			/* Terminate host */		    parts->search = (p + 1);		}	    }	} else {	    parts->absolute = (p + 1);		/* Root found but no host */	}    } else {	parts->relative = (*after_access) ?			     after_access : NULL; /* NULL for "" */    }    /*    **	Check schemes that commonly have unescaped hashes.    */    if (parts->access && parts->anchor) {	if ((!parts->host && strcasecomp(parts->access, "lynxcgi")) ||	    !strcasecomp(parts->access, "nntp") ||	    !strcasecomp(parts->access, "snews") ||	    !strcasecomp(parts->access, "news") ||	    !strcasecomp(parts->access, "data")) {	    /*	     *	Access specified but no host and not a lynxcgi URL, so the	     *	anchor may not really be one, e.g., news:j462#36487@foo.bar,	     *	or it's an nntp or snews URL, or news URL with a host.	     *	Restore the '#' in the address.	     */	    /* but only if we have found a path component of which this will	     * become part. - kw  */	    if (parts->relative || parts->absolute) {		*(parts->anchor - 1) = '#';		parts->anchor = NULL;	    }	}    }#ifdef NOT_DEFINED	/* search is just treated as part of path */    {	char *p = (relative ? relative : absolute);	if (p != NULL) {	    char *q = strchr(p, '?');	/* Any search string? */	    if (q != NULL) {		*q = '\0';		/* If so, chop that off. */		parts->search = (q + 1);	    }	}    }#endif /* NOT_DEFINED */} /*scan *//*	Parse a Name relative to another name.			HTParse()**	--------------------------------------****	This returns those parts of a name which are given (and requested)**	substituting bits from the related name where necessary.**** On entry,**	aName		A filename given**	relatedName	A name relative to which aName is to be parsed**	wanted		A mask for the bits which are wanted.**** On exit,**	returns		A pointer to a calloc'd string which MUST BE FREED*/PUBLIC char * HTParse ARGS3(	CONST char *,	aName,	CONST char *,	relatedName,	int,		wanted){    char * result = NULL;    char * return_value = NULL;    int len;    char * name = NULL;    char * rel = NULL;    char * p;    char * acc_method;    struct struct_parts given, related;    CTRACE((tfp, "HTParse: aName:`%s'\n", aName));    CTRACE((tfp, "   relatedName:`%s'\n", relatedName));    if (wanted & (PARSE_STRICTPATH | PARSE_QUERY)) { /* if detail wanted... */	if ((wanted & (PARSE_STRICTPATH | PARSE_QUERY))	    == (PARSE_STRICTPATH | PARSE_QUERY)) /* if strictpath AND query */	    wanted |= PARSE_PATH; /* then treat as if PARSE_PATH wanted */	if (wanted & PARSE_PATH) /* if PARSE_PATH wanted */	    wanted &= ~(PARSE_STRICTPATH | PARSE_QUERY); /* ignore details */    }    /*    **	Allocate the output string.    */    len = strlen(aName) + strlen(relatedName) + 10;    result = typecallocn(char, len);	/* Lots of space: more than enough */    if (result == NULL) {	outofmem(__FILE__, "HTParse");    }    /*    **	Make working copies of the input strings to cut up.    */    StrAllocCopy(name, aName);    StrAllocCopy(rel, relatedName);    /*    **	Cut up the strings into URL fields.    */    scan(name, &given);    scan(rel,  &related);    /*    **	Handle the scheme (access) field.    */    if (given.access && given.host && !given.relative && !given.absolute) {	if (!strcmp(given.access, "http") ||	    !strcmp(given.access, "https") ||	    !strcmp(given.access, "ftp"))	    /*	    **	Assume root.	    */	    given.absolute = "";    }    acc_method = given.access ? given.access : related.access;    if (wanted & PARSE_ACCESS) {	if (acc_method) {	    strcat(result, acc_method);	    if (wanted & PARSE_PUNCTUATION)		strcat(result, ":");	}    }    /*    **	If different schemes, inherit nothing.    **    **	We'll try complying with RFC 1808 and    **	the Fielding draft, and inherit nothing    **	if both schemes are given, rather than    **	only when they differ, except for    **	file URLs - FM    **    **	After trying it for a while, it's still    **	premature, IHMO, to go along with it, so    **	this is back to inheriting for identical    **	schemes whether or not they are "file".    **	If you want to try it again yourself,    **	uncomment the strcasecomp() below. - FM    */    if ((given.access && related.access) &&	(/* strcasecomp(given.access, "file") || */	 strcmp(given.access, related.access))) {	related.host = NULL;	related.absolute = NULL;	related.relative = NULL;	related.search = NULL;	related.anchor = NULL;    }    /*    **	Handle the host field.    */    if (wanted & PARSE_HOST) {	if (given.host || related.host) {	    char *tail = result + strlen(result);	    if (wanted & PARSE_PUNCTUATION)		strcat(result, "//");	    strcat(result, given.host ? given.host : related.host);#define CLEAN_URLS#ifdef CLEAN_URLS	    /*	    **	Ignore default port numbers, and trailing dots on FQDNs,	    **	which will only cause identical addresses to look different.	    */	    {		char *p2, *h;		if ((p2 = strchr(result, '@')) != NULL)		   tail = (p2 + 1);		p2 = strchr(tail, ':');		if (p2 != NULL && !isdigit(UCH(p2[1])))		    /*		    **	Colon not followed by a port number.		    */		    *p2 = '\0';		if (p2 != NULL && *p2 != '\0' && acc_method != NULL) {		    /*		    **	Port specified.		    */		    if ((!strcmp(acc_method, "http"	 ) && !strcmp(p2, ":80" )) ||			(!strcmp(acc_method, "https"	 ) && !strcmp(p2, ":443")) ||			(!strcmp(acc_method, "gopher"	 ) && !strcmp(p2, ":70" )) ||			(!strcmp(acc_method, "ftp"	 ) && !strcmp(p2, ":21" )) ||			(!strcmp(acc_method, "wais"	 ) && !strcmp(p2, ":210")) ||			(!strcmp(acc_method, "nntp"	 ) && !strcmp(p2, ":119")) ||			(!strcmp(acc_method, "news"	 ) && !strcmp(p2, ":119")) ||			(!strcmp(acc_method, "newspost"  ) && !strcmp(p2, ":119")) ||			(!strcmp(acc_method, "newsreply" ) && !strcmp(p2, ":119")) ||			(!strcmp(acc_method, "snews"	 ) && !strcmp(p2, ":563")) ||			(!strcmp(acc_method, "snewspost" ) && !strcmp(p2, ":563")) ||			(!strcmp(acc_method, "snewsreply") && !strcmp(p2, ":563")) ||			(!strcmp(acc_method, "finger"	 ) && !strcmp(p2, ":79" )) ||			(!strcmp(acc_method, "telnet"	 ) && !strcmp(p2, ":23" )) ||			(!strcmp(acc_method, "tn3270"	 ) && !strcmp(p2, ":23" )) ||			(!strcmp(acc_method, "rlogin"	 ) && !strcmp(p2, ":513")) ||			(!strcmp(acc_method, "cso"	 ) && !strcmp(p2, ":105")))		    *p2 = '\0'; /* It is the default: ignore it */		}		if (p2 == NULL) {		    int len2 = strlen(tail);		    if (len2 > 0) {			h = tail + len2 - 1;	/* last char of hostname */			if (*h == '.')			    *h = '\0';		/* chop final . */		    }		} else if (p2 != result) {		    h = p2;		    h--;		/* End of hostname */		    if (*h == '.') {			/*			**  Slide p2 over h.			*/			while (*p2 != '\0')			    *h++ = *p2++;			*h = '\0';	/* terminate */		    }		}	    }#endif /* CLEAN_URLS */	}    }    /*    **	If host in given or related was ended directly with a '?' (no    **  slash), fake the search part into absolute.  This is the only    **  case search is returned from scan.  A host must have been present.    **  this restores the '?' at which the host part had been truncated in    **  scan, we have to do this after host part handling is done. - kw    **    */    if (given.search && *(given.search - 1) == '\0') {	given.absolute = given.search - 1;	given.absolute[0] = '?';    } else if (related.search && !related.absolute &&	       *(related.search - 1) == '\0') {	related.absolute = related.search - 1;	related.absolute[0] = '?';    }    /*    **	If different hosts, inherit no path.    */    if (given.host && related.host)	if (strcmp(given.host, related.host) != 0) {	    related.absolute = NULL;	    related.relative = NULL;	    related.anchor = NULL;	}    /*    **	Handle the path.    */    if (wanted & (PARSE_PATH | PARSE_STRICTPATH | PARSE_QUERY)) {	char *tail = NULL;	int want_detail = (wanted & (PARSE_STRICTPATH | PARSE_QUERY));	if (want_detail)	    tail = result + strlen(result);	if (acc_method && !given.absolute && given.relative) {	    if (!strcasecomp(acc_method, "nntp") ||		!strcasecomp(acc_method, "snews") ||		(!strcasecomp(acc_method, "news") &&		 !strncasecomp(result, "news://", 7))) {		/*		 *  Treat all given nntp or snews paths,		 *  or given paths for news URLs with a host,		 *  as absolute.		 */		given.absolute = given.relative;		given.relative = NULL;	    }	}	if (given.absolute) {			/* All is given */	    if (wanted & PARSE_PUNCTUATION)		strcat(result, "/");	    strcat(result, given.absolute);	    CTRACE((tfp, "HTParse: (ABS)\n"));	} else if (related.absolute) {		/* Adopt path not name */	    strcat(result, "/");	    strcat(result, related.absolute);	    if (given.relative) {		p = strchr(result, '?');	/* Search part? */		if (p == NULL)		    p = (result + strlen(result) - 1);		for (; *p != '/'; p--)		    ;				/* last / */		p[1] = '\0';			/* Remove filename */		strcat(result, given.relative); /* Add given one */		HTSimplify (result);	    }	    CTRACE((tfp, "HTParse: (Related-ABS)\n"));	} else if (given.relative) {	    strcat(result, given.relative);		/* what we've got */	    CTRACE((tfp, "HTParse: (REL)\n"));	} else if (related.relative) {	    strcat(result, related.relative);	    CTRACE((tfp, "HTParse: (Related-REL)\n"));	} else {  /* No inheritance */	    if (strncasecomp(aName, "lynxcgi:", 8) &&		strncasecomp(aName, "lynxexec:", 9) &&		strncasecomp(aName, "lynxprog:", 9)) {		strcat(result, "/");	    }	    if (!strcmp(result, "news:/"))		result[5] = '*';	    CTRACE((tfp, "HTParse: (No inheritance)\n"));	}	if (want_detail) {	    p = strchr(tail, '?');	/* Search part? */	    if (p) {		if (PARSE_STRICTPATH) {		    *p = '\0';		} else {		    if (!(wanted & PARSE_PUNCTUATION))			p++;		    do {			*tail++ = *p;		    } while (*p++);		}	    } else {		if (wanted & PARSE_QUERY)		    *tail = '\0';	    }	}    }    /*    **	Handle the fragment (anchor).    */    if (wanted & PARSE_ANCHOR)	if ((given.anchor && *given.anchor) ||	    (!given.anchor && related.anchor)) {	    if (wanted & PARSE_PUNCTUATION)		strcat(result, "#");	    strcat(result, (given.anchor) ?			     given.anchor : related.anchor);	}    CTRACE((tfp, "HTParse:      result:%s\n", result));    FREE(rel);    FREE(name);    StrAllocCopy(return_value, result);    FREE(result);    return return_value;		/* exactly the right length */}/*	Simplify a filename.				HTSimplify()**	--------------------****  A unix-style file is allowed to contain the seqeunce xxx/../ which may**  be replaced by "" , and the seqeunce "/./" which may be replaced by "/".**  Simplification helps us recognize duplicate filenames.****	Thus,	/etc/junk/../fred	becomes /etc/fred

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -