⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 htparse.c

📁 elinks下lynx是最重要的二个文本浏览器, 在linux下非常实用, lynx比elinks早的多, 目前好像停止开发, 这是lynx源代码
💻 C
📖 第 1 页 / 共 3 页
字号:
/*		Parse HyperText Document Address		HTParse.c**		================================*/#include <HTUtils.h>#include <HTParse.h>#include <LYUtils.h>#include <LYLeaks.h>#include <LYStrings.h>#include <LYCharUtils.h>#ifdef HAVE_ALLOCA_H#include <alloca.h>#else#ifdef __MINGW32__#include <malloc.h>#endif /* __MINGW32__ */#endif#define HEX_ESCAPE '%'struct struct_parts {	char * access;	char * host;	char * absolute;	char * relative;	char * search;		/* treated normally as part of path */	char * anchor;};/*	Strip white space off a string.				HTStrip()**	-------------------------------**** On exit,**	Return value points to first non-white character, or to 0 if none.**	All trailing white space is OVERWRITTEN with zero.*/PUBLIC char * HTStrip ARGS1(	char *,		s){#define SPACE(c) ((c == ' ') || (c == '\t') || (c == '\n'))    char * p = s;    for (p = s; *p; p++)	;			/* Find end of string */    for (p--; p >= s; p--) {	if (SPACE(*p))	    *p = '\0';		/* Zap trailing blanks */	else	    break;    }    while (SPACE(*s))	s++;			/* Strip leading blanks */    return s;}/*	Scan a filename for its constituents.			scan()**	-------------------------------------**** On entry,**	name	points to a document name which may be incomplete.** On exit,**	absolute or relative may be nonzero (but not both).**	host, anchor and access may be nonzero if they were specified.**	Any which are nonzero point to zero terminated strings.*/PRIVATE void scan ARGS2(	char *,			name,	struct struct_parts *,	parts){    char * after_access;    char * p;    parts->access = NULL;    parts->host = NULL;    parts->absolute = NULL;    parts->relative = NULL;    parts->search = NULL;	/* normally not used - kw */    parts->anchor = NULL;    /*    **	Scan left-to-right for a scheme (access).    */    after_access = name;    for (p = name; *p; p++) {	if (*p==':') {	    *p = '\0';	    parts->access = name;	/* Access name has been specified */	    after_access = (p + 1);	    break;	}	if (*p == '/' || *p == '#' || *p == ';' || *p == '?')	    break;    }    /*    **	Scan left-to-right for a fragment (anchor).    */    for (p = after_access; *p; p++) {	if (*p =='#') {	    parts->anchor = (p + 1);	    *p = '\0';			/* terminate the rest */	    break;		/* leave things after first # alone - kw */	}    }    /*    **	Scan left-to-right for a host or absolute path.    */    p = after_access;    if (*p == '/') {	if (p[1] == '/') {	    parts->host = (p + 2);	  /* host has been specified	*/	    *p = '\0';			  /* Terminate access		*/	    p = strchr(parts->host, '/'); /* look for end of host name if any */	    if (p != NULL) {		*p = '\0';			/* Terminate host */		parts->absolute = (p + 1);	/* Root has been found */	    } else {		p = strchr(parts->host, '?');		if (p != NULL) {		    *p = '\0';			/* Terminate host */		    parts->search = (p + 1);		}	    }	} else {	    parts->absolute = (p + 1);		/* Root found but no host */	}    } else {	parts->relative = (*after_access) ?			     after_access : NULL; /* NULL for "" */    }    /*    **	Check schemes that commonly have unescaped hashes.    */    if (parts->access && parts->anchor &&		/* optimize */ strchr("lnsdLNSD", *parts->access) != NULL) {	if ((!parts->host && strcasecomp(parts->access, "lynxcgi")) ||	    !strcasecomp(parts->access, "nntp") ||	    !strcasecomp(parts->access, "snews") ||	    !strcasecomp(parts->access, "news") ||	    !strcasecomp(parts->access, "data")) {	    /*	     *	Access specified but no host and not a lynxcgi URL, so the	     *	anchor may not really be one, e.g., news:j462#36487@foo.bar,	     *	or it's an nntp or snews URL, or news URL with a host.	     *	Restore the '#' in the address.	     */	    /* but only if we have found a path component of which this will	     * become part. - kw  */	    if (parts->relative || parts->absolute) {		*(parts->anchor - 1) = '#';		parts->anchor = NULL;	    }	}    }} /*scan */#if defined(HAVE_ALLOCA) && !defined(LY_FIND_LEAKS)#define LYalloca(x)        alloca(x)#define LYalloca_free(x)   {}#else#define LYalloca(x)        malloc(x)#define LYalloca_free(x)   free(x)#endif/*	Parse a Name relative to another name.			HTParse()**	--------------------------------------****	This returns those parts of a name which are given (and requested)**	substituting bits from the related name where necessary.**** On entry,**	aName		A filename given**	relatedName	A name relative to which aName is to be parsed**	wanted		A mask for the bits which are wanted.**** On exit,**     returns         A pointer to a malloc'd string which MUST BE FREED*/PUBLIC char * HTParse ARGS3(	CONST char *,	aName,	CONST char *,	relatedName,	int,		wanted){    char * result = NULL;    char * tail = NULL;  /* a pointer to the end of the 'result' string */    char * return_value = NULL;    int len, len1, len2;    char * name = NULL;    char * rel = NULL;    char * p;    char * acc_method;    struct struct_parts given, related;    CTRACE((tfp, "HTParse: aName:`%s'\n", aName));    CTRACE((tfp, "   relatedName:`%s'\n", relatedName));    if (wanted & (PARSE_STRICTPATH | PARSE_QUERY)) { /* if detail wanted... */	if ((wanted & (PARSE_STRICTPATH | PARSE_QUERY))	    == (PARSE_STRICTPATH | PARSE_QUERY)) /* if strictpath AND query */	    wanted |= PARSE_PATH; /* then treat as if PARSE_PATH wanted */	if (wanted & PARSE_PATH) /* if PARSE_PATH wanted */	    wanted &= ~(PARSE_STRICTPATH | PARSE_QUERY); /* ignore details */    }    CTRACE((tfp, "   want:%s%s%s%s%s%s%s\n",	    wanted & PARSE_PUNCTUATION ? " punc"   : "",	    wanted & PARSE_ANCHOR      ? " anchor" : "",	    wanted & PARSE_PATH        ? " path"   : "",	    wanted & PARSE_HOST        ? " host"   : "",	    wanted & PARSE_ACCESS      ? " access" : "",	    wanted & PARSE_STRICTPATH  ? " PATH"   : "",	    wanted & PARSE_QUERY       ? " QUERY"  : ""));    /*    ** Allocate the temporary string. Optimized.    */    len1 = strlen(aName) + 1;    len2 = strlen(relatedName) + 1;    len = len1 + len2 + 8;     /* Lots of space: more than enough */    result = tail = (char*)LYalloca(len * 2 + len1 + len2);    if (result == NULL) {	outofmem(__FILE__, "HTParse");    }    *result = '\0';    name = result + len;    rel = name + len1;    /*    **	Make working copy of the input string to cut up.    */    memcpy(name, aName, len1);    /*    **	Cut up the string into URL fields.    */    scan(name, &given);    /*    **	Now related string.    */    if ((given.access && given.host && given.absolute) || !*relatedName) {	/*	**  Inherit nothing!	*/	related.access = NULL;	related.host = NULL;	related.absolute = NULL;	related.relative = NULL;	related.search = NULL;	related.anchor = NULL;    } else {	memcpy(rel, relatedName, len2);	scan(rel,  &related);    }    /*    **	Handle the scheme (access) field.    */    if (given.access && given.host && !given.relative && !given.absolute) {	if (!strcmp(given.access, "http") ||	    !strcmp(given.access, "https") ||	    !strcmp(given.access, "ftp"))	    /*	    **	Assume root.	    */	    given.absolute = "";    }    acc_method = given.access ? given.access : related.access;    if (wanted & PARSE_ACCESS) {	if (acc_method) {	    strcpy(tail, acc_method);	    tail += strlen(tail);	    if (wanted & PARSE_PUNCTUATION) {		*tail++ = ':';		*tail = '\0';	    }	}    }    /*    **	If different schemes, inherit nothing.    **    **	We'll try complying with RFC 1808 and    **	the Fielding draft, and inherit nothing    **	if both schemes are given, rather than    **	only when they differ, except for    **	file URLs - FM    **    **	After trying it for a while, it's still    **	premature, IHMO, to go along with it, so    **	this is back to inheriting for identical    **	schemes whether or not they are "file".    **	If you want to try it again yourself,    **	uncomment the strcasecomp() below. - FM    */    if ((given.access && related.access) &&	(/* strcasecomp(given.access, "file") || */	 strcmp(given.access, related.access))) {	related.host = NULL;	related.absolute = NULL;	related.relative = NULL;	related.search = NULL;	related.anchor = NULL;    }    /*    **	Handle the host field.    */    if (wanted & PARSE_HOST) {	if (given.host || related.host) {	    if (wanted & PARSE_PUNCTUATION) {		*tail++ = '/';		*tail++ = '/';	    }	    strcpy(tail, given.host ? given.host : related.host);#define CLEAN_URLS#ifdef CLEAN_URLS	    /*	    **	Ignore default port numbers, and trailing dots on FQDNs,	    **	which will only cause identical addresses to look different.	    **  (related is already a clean url).	    */	    {		char *p2, *h;		if ((p2 = strchr(result, '@')) != NULL)		   tail = (p2 + 1);		p2 = strchr(tail, ':');		if (p2 != NULL && !isdigit(UCH(p2[1])))		    /*		    **	Colon not followed by a port number.		    */		    *p2 = '\0';		if (p2 != NULL && *p2 != '\0' && acc_method != NULL) {		    /*		    **	Port specified.		    */		    if ((!strcmp(acc_method, "http"	 ) && !strcmp(p2, ":80" )) ||			(!strcmp(acc_method, "https"	 ) && !strcmp(p2, ":443")) ||			(!strcmp(acc_method, "gopher"	 ) && !strcmp(p2, ":70" )) ||			(!strcmp(acc_method, "ftp"	 ) && !strcmp(p2, ":21" )) ||			(!strcmp(acc_method, "wais"	 ) && !strcmp(p2, ":210")) ||			(!strcmp(acc_method, "nntp"	 ) && !strcmp(p2, ":119")) ||			(!strcmp(acc_method, "news"	 ) && !strcmp(p2, ":119")) ||			(!strcmp(acc_method, "newspost"  ) && !strcmp(p2, ":119")) ||			(!strcmp(acc_method, "newsreply" ) && !strcmp(p2, ":119")) ||			(!strcmp(acc_method, "snews"	 ) && !strcmp(p2, ":563")) ||			(!strcmp(acc_method, "snewspost" ) && !strcmp(p2, ":563")) ||			(!strcmp(acc_method, "snewsreply") && !strcmp(p2, ":563")) ||			(!strcmp(acc_method, "finger"	 ) && !strcmp(p2, ":79" )) ||			(!strcmp(acc_method, "telnet"	 ) && !strcmp(p2, ":23" )) ||			(!strcmp(acc_method, "tn3270"	 ) && !strcmp(p2, ":23" )) ||			(!strcmp(acc_method, "rlogin"	 ) && !strcmp(p2, ":513")) ||			(!strcmp(acc_method, "cso"	 ) && !strcmp(p2, ":105")))		    *p2 = '\0'; /* It is the default: ignore it */		}		if (p2 == NULL) {		    int len3 = strlen(tail);		    if (len3 > 0) {			h = tail + len3 - 1;	/* last char of hostname */			if (*h == '.')			    *h = '\0';		/* chop final . */		    }		} else if (p2 != result) {		    h = p2;		    h--;		/* End of hostname */		    if (*h == '.') {			/*			**  Slide p2 over h.			*/			while (*p2 != '\0')			    *h++ = *p2++;			*h = '\0';	/* terminate */

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -