📄 urlutils.cxx
字号:
/* * * C++ Portable Types Library (PTypes) * Version 2.0.2 Released 17-May-2004 * * Copyright (C) 2001-2004 Hovik Melikyan * * http://www.melikyan.com/ptypes/ * */#include <stdlib.h>#include <ptypes.h>#include <pinet.h>#include "urlutils.h"USING_PTYPESchar* opt_anonymous_username = "ftp";char* opt_anonymous_password = "unknown@";char* opt_default_urlscheme = "http";static cset schemechars("0-9A-Za-z~-.");// TODO: make these sets RFC2396-compliantstatic cset unsafechars("~00-~20%:/@?#;\\<>+\"'~7F-~FF");static cset unsafepathchars = unsafechars - '/';static cset unsafeparamchars = unsafechars;static cset pathtermchars("~00#?;");bool isurl(const string& s){ const char* p = pconst(s); const char* b = p; while (*p & schemechars) p++; return p > b && *p == ':';}static string urlencode(const string& s, const cset& unsafe){ static const char hexchars[17] = "0123456789ABCDEF"; int numunsafe = 0; const char* p; for (p = s; *p != 0; p++) if (*p & unsafe) numunsafe++; if (numunsafe == 0) return s; string ret; setlength(ret, length(s) + numunsafe * 2); p = s; char* d = unique(ret); for (; *p != 0; p++, d++) { if (*p & unsafe) { *d++ = '%'; *d++ = hexchars[*p >> 4]; *d = hexchars[*p & 0x0f]; } else *d = *p; } return ret;}string urlencodepath(const string& path){ return urlencode(path, unsafepathchars);}static int urldefport(const string& scheme){ if (scheme == "http") return 80; else if (scheme == "https") return 443; else if (scheme == "ftp") return 21; else return 0;}urlrec::urlrec() : scheme(), username(), password(), pwdset(false), host(), port(0), path(), proto(), query(), fragment() {}void urlclear(urlrec& u){ clear(u.scheme); clear(u.username); clear(u.password); u.pwdset = false; clear(u.host); u.port = 0; clear(u.path); clear(u.proto); clear(u.query); clear(u.fragment);}// urlcreate:// NOTES: if scheme is empty, opt_default_urlscheme is taken; for ftp scheme // leading '/' in the path component is significant; for all other schemes // the leading '/' is ignored.string urlcreate(const urlrec& u){ string ret; string s, p; // scheme if (isempty(u.scheme)) s = opt_default_urlscheme; else s = lowercase(u.scheme); if (s[length(s) - 1] == ':') setlength(s, length(s) - 1); ret = s + "://"; // username and password if (!isempty(u.username)) { ret += urlencode(u.username, unsafechars); if (u.pwdset) ret += ':' + urlencode(u.password, unsafechars); ret += '@'; } // host name and port number ret += u.host; if (u.port != 0 && u.port != urldefport(s)) ret += ':' + itostring(u.port); // path ret += '/'; p = u.path; if (!isempty(p) && p[0] == '/') { if (s == "ftp") ret += "%2F"; del(p, 0, 1); } ret += urlencode(p, unsafepathchars); // params if (!isempty(u.proto)) ret += ';' + urlencode(u.proto, unsafeparamchars); if (!isempty(u.query)) ret += '?' + urlencode(u.query, unsafeparamchars); if (!isempty(u.fragment)) ret += '#' + urlencode(u.fragment, unsafeparamchars); return ret;}// urlcrack:// Note: no "friendly" URL's! If the URL is invalid the result is undefined.static int xchartoint(char c){ if (c >= 'a') return c - 'a' + 10; else if (c >= 'A') return c - 'A' + 10; else return c - '0';}static void assignurlstr(string& s, const char* p, const char* end, bool decodeplus = false){ if (p >= end) clear(s); else { setlength(s, end - p); char* d = unique(s); for (; p < end; p++, d++) { if (*p == '+' && decodeplus) *d = ' '; else if (*p == '%') { if (++p == end) break; *d = char(xchartoint(*p) << 4); if (++p == end) break; *d |= char(xchartoint(*p)); } else *d = *p; } setlength(s, d - pconst(s)); }}static void crackpath(const char* p, urlrec& u){ const char* end = p; while (!(*end & pathtermchars)) // [#0, '?', '#', ';'] end++; assignurlstr(u.path, p, end); // leading '/' if (u.scheme == "http" || u.scheme == "https") { if (isempty(u.path)) u.path = '/'; else if (*pconst(u.path) != '/' && *pconst(u.path) != '~') ins('/', u.path, 0); } // parameters while (*end != 0) { char paramtype = *end; p = ++end; while (!(*end & pathtermchars)) end++; switch(paramtype) { case ';': assignurlstr(u.proto, p, end); break; case '?': assignurlstr(u.query, p, end, true); break; case '#': assignurlstr(u.fragment, p, end); break; } }}void urlcrack(const string& s, urlrec& u){ urlclear(u); const char* t; const char* p = s; const char* end = p; // scheme while (*end & schemechars) end++; if (*end != ':' || *(end + 1) != '/' || *(end + 2) != '/') return; // invalid scheme: we don't want to generate errors assignurlstr(u.scheme, p, end); u.scheme = lowercase(u.scheme); end += 3; // skip '://' // hostname and possibly username:password p = end; while (*end != 0 && *end != '/' && *end != '@') end++; // username and possibly password if (*end == '@') { t = p; while (*t != ':' && *t != '@') t++; // password if (*t == ':') { assignurlstr(u.password, t + 1, end); u.pwdset = true; } // username assignurlstr(u.username, p, t); end++; p = end; while (*end != 0 && *end != '/') end++; } else if (u.scheme == "ftp") { u.username = opt_anonymous_username; u.password = opt_anonymous_password; u.pwdset = true; } // hostname and possibly port number t = p; while (*t != 0 && *t != ':' && *t != '/') t++; assign(u.host, p, t - p); // port if (*t == ':') { string s; assign(s, t + 1, end - t - 1); u.port = atoi(s); } // path if (*end == '/') end++; crackpath(end, u);}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -