📄 uri.cc
字号:
#include "Uri.h"#include "wincstring.h"#include <strstream>#include <cassert>#include "tld.h"//#define DEBUG#include "debug.h"using namespace std;using namespace htmlcxx;/** Structure to store various schemes and their default ports */struct schemes_t { /** The name of the scheme */ const char *name; /** The default port for the scheme */ unsigned int default_port;};/* Some WWW schemes and their default ports; this is basically /etc/services *//* This will become global when the protocol abstraction comes *//* As the schemes are searched by a linear search, *//* they are sorted by their expected frequency */static schemes_t schemes[] ={ {"http", Uri::URI_HTTP_DEFAULT_PORT}, {"ftp", Uri::URI_FTP_DEFAULT_PORT}, {"https", Uri::URI_HTTPS_DEFAULT_PORT}, {"gopher", Uri::URI_GOPHER_DEFAULT_PORT}, {"ldap", Uri::URI_LDAP_DEFAULT_PORT}, {"nntp", Uri::URI_NNTP_DEFAULT_PORT}, {"snews", Uri::URI_SNEWS_DEFAULT_PORT}, {"imap", Uri::URI_IMAP_DEFAULT_PORT}, {"pop", Uri::URI_POP_DEFAULT_PORT}, {"sip", Uri::URI_SIP_DEFAULT_PORT}, {"rtsp", Uri::URI_RTSP_DEFAULT_PORT}, {"wais", Uri::URI_WAIS_DEFAULT_PORT}, {"z39.50r", Uri::URI_WAIS_DEFAULT_PORT}, {"z39.50s", Uri::URI_WAIS_DEFAULT_PORT}, {"prospero", Uri::URI_PROSPERO_DEFAULT_PORT}, {"nfs", Uri::URI_NFS_DEFAULT_PORT}, {"tip", Uri::URI_TIP_DEFAULT_PORT}, {"acap", Uri::URI_ACAP_DEFAULT_PORT}, {"telnet", Uri::URI_TELNET_DEFAULT_PORT}, {"ssh", Uri::URI_SSH_DEFAULT_PORT}, { NULL, 0xFFFF } /* unknown port */};static unsigned int port_of_Scheme(const char *scheme_str){ schemes_t *scheme; if (scheme_str) { for (scheme = schemes; scheme->name != NULL; ++scheme) { if (strcasecmp(scheme_str, scheme->name) == 0) { return scheme->default_port; } } } return 0;}/* We have a apr_table_t that we can index by character and it tells us if the * character is one of the interesting delimiters. Note that we even get * compares for NUL for free -- it's just another delimiter. */#define T_COLON 0x01 /* ':' */#define T_SLASH 0x02 /* '/' */#define T_QUESTION 0x04 /* '?' */#define T_HASH 0x08 /* '#' */#define T_NUL 0x80 /* '\0' *//* the uri_delims.h file is autogenerated by gen_uri_delims.c *//* this file is automatically generated by gen_uri_delims, do not edit */static const unsigned char uri_delims[256] = { T_NUL,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,T_HASH,0,0,0,0, 0,0,0,0,0,0,0,T_SLASH,0,0,0,0,0,0,0,0,0,0,T_COLON,0, 0,0,0,T_QUESTION,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 };/* it works like this: if (uri_delims[ch] & NOTEND_foobar) { then we're not at a delimiter for foobar }*//* Note that we optimize the scheme scanning here, we cheat and let the * compiler know that it doesn't have to do the & masking. */#define NOTEND_SCHEME (0xff)#define NOTEND_HOSTINFO (T_SLASH | T_QUESTION | T_HASH | T_NUL)#define NOTEND_PATH (T_QUESTION | T_HASH | T_NUL)static size_t wwwPrefixOffset(const std::string& hostname);Uri::Uri(): mScheme(), mUser(), mPassword(), mHostname(), mPath(), mQuery(), mFragment(), mExistsQuery(false), mExistsFragment(false), mPort(0){}Uri::Uri(const string &uri_str): mScheme(), mUser(), mPassword(), mHostname(), mPath(), mQuery(), mFragment(), mExistsQuery(false), mExistsFragment(false), mPort(0){ init(uri_str);}void Uri::init(const string &uri_str){ DEBUGP("Parsing uri %s\n", uri_str.c_str()); if(uri_str.empty()) return; const char *uri = uri_str.c_str(); const char *s; const char *s1; const char *hostinfo; char *endstr; /* We assume the processor has a branch predictor like most -- * it assumes forward branches are untaken and backwards are taken. That's * the reason for the gotos. -djg */ if (uri[0] == '/') { deal_with_path: DEBUGP("Dealing with path\n"); /* we expect uri to point to first character of path ... remember * that the path could be empty -- http://foobar?query for example */ s = uri; while ((uri_delims[*(unsigned char *)s] & NOTEND_PATH) == 0) { ++s; } if (s != uri) { mPath.assign(uri, s - uri); DEBUGP("Path is %s\n", mPath.c_str()); } if (*s == 0) { return; } if (*s == '?') { ++s; s1 = strchr(s, '#'); if (s1) { mFragment.assign(s1 + 1); mExistsFragment = true; DEBUGP("Fragment is %s\n", mFragment.c_str()); mQuery.assign(s, s1 - s); mExistsQuery = true; DEBUGP("Query is %s\n", mQuery.c_str()); } else { mQuery.assign(s); mExistsQuery = true; DEBUGP("Query is %s\n", mQuery.c_str()); } return; } /* otherwise it's a fragment */ mFragment.assign(s + 1); mExistsFragment = true; DEBUGP("Fragment is %s\n", mFragment.c_str()); return; } DEBUGP("Dealing with scheme\n"); /* find the scheme: */ if (!isalpha(*uri)) goto deal_with_path; s = uri; while ((uri_delims[*(unsigned char *)s] & NOTEND_SCHEME) == 0) { ++s; } /* scheme must be non-empty and followed by :// */ if (s == uri || s[0] != ':' || s[1] != '/' || s[2] != '/') { goto deal_with_path; /* backwards predicted taken! */ } mScheme.assign(uri, s - uri); DEBUGP("Scheme is %s\n", mScheme.c_str()); s += 3; DEBUGP("Finding hostinfo\n"); hostinfo = s; DEBUGP("Hostinfo is %s\n", hostinfo); while ((uri_delims[*(unsigned char *)s] & NOTEND_HOSTINFO) == 0) { ++s; } uri = s; /* whatever follows hostinfo is start of uri */// mHostinfo.assign(hostinfo, uri - hostinfo); /* If there's a username:password@host:port, the @ we want is the last @... * too bad there's no memrchr()... For the C purists, note that hostinfo * is definately not the first character of the original uri so therefore * &hostinfo[-1] < &hostinfo[0] ... and this loop is valid C. */ do { --s; } while (s >= hostinfo && *s != '@'); if (s < hostinfo) { /* again we want the common case to be fall through */deal_with_host: DEBUGP("Dealing with host\n"); /* We expect hostinfo to point to the first character of * the hostname. If there's a port it is the first colon. */ s = (char *)memchr(hostinfo, ':', uri - hostinfo); if (s == NULL) { /* we expect the common case to have no port */ mHostname.assign(hostinfo, uri - hostinfo); DEBUGP("Hostname is %s\n", mHostname.c_str()); goto deal_with_path; } mHostname.assign(hostinfo, s - hostinfo); DEBUGP("Hostname is %s\n", mHostname.c_str()); ++s; if (uri != s) { mPortStr.assign(s, uri - s); mPort = strtol(mPortStr.c_str(), &endstr, 10); if (*endstr == '\0') { goto deal_with_path; } /* Invalid characters after ':' found */ DEBUGP("Throwing invalid url exception\n"); throw Exception("Invalid character after ':'"); } this->mPort = port_of_Scheme(mScheme.c_str()); goto deal_with_path; } /* first colon delimits username:password */ s1 = (char *)memchr(hostinfo, ':', s - hostinfo); if (s1) { mUser.assign(hostinfo, s1 - hostinfo); ++s1; mPassword.assign(s1, s - s1); } else { mUser.assign(hostinfo, s - hostinfo); } hostinfo = s + 1; goto deal_with_host;}Uri::~Uri() {}string Uri::scheme() const { return mScheme; }void Uri::scheme(string scheme) { mScheme = scheme;}string Uri::user() const { return mUser; }void Uri::user(string user) { mUser = user;}string Uri::password() const { return mPassword; }void Uri::password(string password) { mPassword = password;}string Uri::hostname() const { return mHostname; }void Uri::hostname(string hostname) { mHostname = hostname;}string Uri::path() const { return mPath; }void Uri::path(string path) { mPath = path;}bool Uri::existsFragment() const { return mExistsFragment; }void Uri::existsFragment(bool existsFragment) { mExistsFragment = existsFragment;}bool Uri::existsQuery() const { return mExistsQuery; }void Uri::existsQuery(bool existsQuery) { mExistsQuery = existsQuery;}string Uri::query() const { return mQuery; }
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -