⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 uri.cc

📁 著名的标准C++的html解析器
💻 CC
📖 第 1 页 / 共 2 页
字号:
void Uri::query(string query) {	mQuery = query;}string Uri::fragment() const { return mFragment; }void Uri::fragment(string fragment) {	mFragment = fragment;}unsigned int Uri::port() const { return mPort; }void Uri::port(unsigned int port) { mPort = port; }static const char *default_filenames[] = { "index", "default", NULL };static const char *default_extensions[] = { ".html", ".htm", ".php", ".shtml", ".asp", ".cgi", NULL };static unsigned short default_port_for_scheme(const char *scheme_str){	schemes_t *scheme;	if (scheme_str == NULL)		return 0;	for (scheme = schemes; scheme->name != NULL; ++scheme)		if (strcasecmp(scheme_str, scheme->name) == 0)			return scheme->default_port;	return 0;}Uri Uri::absolute(const Uri &base) const{	if (mScheme.empty())	{		Uri root(base);		if (root.mPath.empty()) root.mPath = "/";		if (mPath.empty())		{			if (mExistsQuery)			{				root.mQuery = mQuery;				root.mExistsQuery = mExistsQuery;				root.mFragment = mFragment;				root.mExistsFragment = mExistsFragment;			}			else if (mExistsFragment)			{				root.mFragment = mFragment;				root.mExistsFragment = mExistsFragment;			}		}		else if (mPath[0] == '/')		{			root.mPath = mPath;			root.mQuery = mQuery;			root.mExistsQuery = mExistsQuery;			root.mFragment = mFragment;			root.mExistsFragment = mExistsFragment;		}		else		{			string path(root.mPath);			string::size_type find;			find = path.rfind("/");			if (find != string::npos) path.erase(find+1);			path += mPath;			root.mPath = path;			root.mQuery = mQuery;			root.mExistsQuery = mExistsQuery;			root.mFragment = mFragment;			root.mExistsFragment = mExistsFragment;		}		return root;	}	if (mPath.empty())	{		Uri root(*this);		root.mPath = "/";		return root;	}	return *this;}string Uri::unparse(int flags ) const{	string ret;	ret.reserve(mScheme.length() + mUser.length() + mPassword.length() + mHostname.length() + mPath.length() + mQuery.length() + mFragment.length() + mPortStr.length());	DEBUGP("Unparsing scheme\n");	if(!(Uri::REMOVE_SCHEME & flags)) {		if(!mScheme.empty()) {			ret +=  mScheme;			ret += "://";		}	}	DEBUGP("Unparsing hostname\n");	if(!mHostname.empty()) { 		size_t offset = 0;		if(flags & Uri::REMOVE_WWW_PREFIX && mHostname.length() > 3) {			offset = wwwPrefixOffset(mHostname);		}		ret += (mHostname.c_str() + offset);	}	DEBUGP("Unparsing port\n");	if (!mPortStr.empty() && !(!mScheme.empty() && mPort == default_port_for_scheme(mScheme.c_str())))	{		ret += ':';		ret += mPortStr;	}	DEBUGP("Unparsing path\n");	if(!mPath.empty()) 	{		char *buf = new char[mPath.length() + 1];		memcpy(buf, mPath.c_str(), mPath.length() + 1);		if(flags & Uri::REMOVE_DEFAULT_FILENAMES) {			const char **ptr = default_extensions;			char *end = buf + mPath.length();			size_t offset = 0;			while(*ptr != NULL) {				size_t len = strlen(*ptr);				if((strcmp(end - len, *ptr)) == 0) {					offset = len;					break;				}				++ptr;			} 			if(offset == 0) goto remove_bar;			ptr = default_filenames;			bool found = false;			while(*ptr != NULL) {				size_t len = strlen(*ptr);				if(strncmp(end - offset - len, *ptr, len) == 0) {					offset += len; 					found = true;					break;				}				++ptr;			}			if(found) {				*(end - offset) = 0; //cut filename			}		}		remove_bar:		if(flags & Uri::REMOVE_TRAILING_BAR) {			if(strlen(buf) > 1 && buf[strlen(buf) - 1] == '/') { //do not remove if path is only the bar				buf[strlen(buf) - 1] = 0;			}		} 		ret += buf;		delete [] buf;	}	DEBUGP("Unparsing query\n");	if(!(flags & Uri::REMOVE_QUERY) && mExistsQuery) {		ret += '?';		if(flags & Uri::REMOVE_QUERY_VALUES) {			const char *ptr = mQuery.c_str();			bool inside = false;			while(*ptr) {				if(*ptr == '=') {					inside = true;				}				if(*ptr == '&') {					inside = false;				}				if(inside) {					++ptr;				} else {					ret += *ptr;					++ptr;				}			}		} else {			ret += mQuery;		}	}	DEBUGP("Unparsing fragment\n");	if(!(flags & Uri::REMOVE_FRAGMENT) && mExistsFragment)	{		ret += '#';		ret += mFragment;	}	return ret;}static size_t wwwPrefixOffset(const std::string& hostname) {	string::size_type len = hostname.length();	if(strncasecmp("www", hostname.c_str(), 3) == 0)	{		if(len > 3 && hostname[3] == '.') 		{			return 4;		}		if(len > 4 && isdigit(hostname[3]) && hostname[4] == '.')		{			return 5;		}	}	return 0;}	std::string Uri::canonicalHostname(unsigned int maxDepth) const{	size_t prefixOffset = wwwPrefixOffset(mHostname);	size_t suffixOffset = tldOffset(mHostname.c_str());	unsigned int depth = 0;	string::const_iterator canonicalStart = mHostname.begin() + prefixOffset;	string::const_iterator ptr = mHostname.begin();	ptr += mHostname.length() - suffixOffset;	while (depth < maxDepth && ptr > canonicalStart) 	{		--ptr;		if (*ptr == '.') ++depth;	}	if (*ptr == '.') ++ptr;	return string(ptr, mHostname.end());}std::string Uri::decode(const std::string &uri){    //Note from RFC1630:  "Sequences which start with a percent sign    //but are not followed by two hexadecimal characters (0-9,A-F) are reserved    //for future extension"	const unsigned char *ptr = (const unsigned char *)uri.c_str();	string ret;	ret.reserve(uri.length());	for (; *ptr; ++ptr)	{		if (*ptr == '%')		{			if (*(ptr + 1))			{				char a = *(ptr + 1);				char b = *(ptr + 2);				if (!((a >= 0x30 && a < 0x40) || (a >= 0x41 && a < 0x47))) continue;				if (!((b >= 0x30 && b < 0x40) || (b >= 0x41 && b < 0x47))) continue;				char buf[3];				buf[0] = a;				buf[1] = b;				buf[2] = 0;				ret += (char)strtoul(buf, NULL, 16);				ptr += 2;				continue;			}		}		ret += *ptr;	}	return ret;}//This vector is generated by safechars.py. Please do not edit by hand.static const char safe[] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };std::string Uri::encode(const std::string &uri){	string ret;		const unsigned char *ptr = (const unsigned char *)uri.c_str();	ret.reserve(uri.length());	for (; *ptr ; ++ptr)	{		if (!safe[*ptr]) 		{			char buf[5];			memset(buf, 0, 5);			snprintf(buf, 5, "%%%X", (*ptr));			ret.append(buf); 			}		else 		{			ret += *ptr;		}	}	return ret;}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -