📄 xstring.cpp
字号:
application/x-www-form-urlencoded媒体类型也是按照这种方式编码。由于历史原 因,这和RFC 1738编码(参考 rawurlencode())是不同的,空格在这里被编码成加 号(+)。这个函数通常用来把一个字符串编码作为URL查询的一部分,这样就可以把 变量传到下一个页面。 @code * xstring userinput; * // ... * cout << "<a href=\"mycgi?foo=" << userinput.urlencode() << "\">"; @endcode @see urldecode() rawurlencode() rawurldecode() */ xstring xstring::urlencode(void) const { static char hex_data[] = "0123456789ABCDEF"; size_type size = this->size(); xstring str; str.reserve(size * 3); // 保留足够的内存,以避免发生内存搬移 const_iterator pos; const_iterator end = this->end(); for (pos = this->begin(); pos < end; ++pos) { if (('0' <= *pos && *pos <= '9') || ('A' <= *pos && *pos <= 'Z') || ('a' <= *pos && *pos <= 'z') || ('-' == *pos) || ('_' == *pos) || ('.' == *pos)) { str += *pos; } else if (' ' == *pos) { str += '+'; } else { str += '%'; str += hex_data[(*pos >> 4) & 0x0F]; str += hex_data[*pos & 0x0F]; } } return str; } // xstring::urlencode() /** @return 将字符串中任何%##形式的编码解码,解码后的字符串被返回 @code xstring QUERY_STRING; // ... vector<xstring> a = QUERY_STRING.explode("&"); unsigned int i = 0; xstring b; while (i < a.size()) { b = a[i].getWord("="); cout << "Value for parameter " << b.urldecode().htmlspecialchars() << " is " << a[i].urldecode().htmlspecialchars() << "<br />\n"; ++i; } @endcode @see urlencode() rawurlencode() rawurldecode() */ xstring xstring::urldecode(void) const { size_type size = this->size(); xstring str; str.reserve(size); // 保留足够的内存,以避免发生内存搬移 char buf[2]; unsigned int i; const_iterator pos; const_iterator end = this->end(); for (pos = this->begin(); pos < end; ++pos) { if ('%' == *pos) { if ((pos + 2 < end) && (('0' <= *(pos + 1) && *(pos + 1) <= '9') || ('A' <= *(pos + 1) && *(pos + 1) <= 'Z') || ('a' <= *(pos + 1) && *(pos + 1) <= 'z')) && (('0' <= *(pos + 2) && *(pos + 2) <= '9') || ('A' <= *(pos + 2) && *(pos + 2) <= 'Z') || ('a' <= *(pos + 2) && *(pos + 2) <= 'z'))) { for (i = 0; i < 2; ++i) { buf[i] = *(++pos); if ('0' <= buf[i] && buf[i] <= '9') { buf[i] = buf[i] - '0'; } else if ('A' <= buf[i] && buf[i] <= 'Z') { buf[i] = buf[i] - 'A' + 10; } else // ('a' <= buf[i] && buf[i] <= 'z') { buf[i] = buf[i] - 'a' + 10; } } str += ((buf[0] << 4) & 0xF0) | (buf[1] & 0x0F); continue; } str += *pos; } else if ('+' == *pos) { str += ' '; } else { str += *pos; } } return str; } // xstring::urldecode()//---------------------------------------------------------------------------- /** @return 一个新的字符串,其中所有的非字母数字字符,除了-_.之外,都会被替换 成一个百分号(%)跟着两个十六进制数字。 这是RFC 1738上面描述的编码方式,用来保护文字内容不被特殊的URL分隔字符打断。 这也用来保护URL不被传输媒体上字符转换损坏。比如,你想要在一个FTP URL中包含 密码: @code * cout << "<a href=\"ftp://user:" << xstring("foo @+%/").rawurlencode() * << "@ftp.my.com/x.txt\">"; @endcode 或者,你通过URL中的PATH_INFO传送信息: @code * cout << "<a href=\"http://x.com/department_list_script/" * << xstring("sales and marketing/Miami").rawurlencode() << "\">"; @endcode @see urlencode() urldecode() rawurldecode() */ xstring xstring::rawurlencode(void) const { static char hex_data[] = "0123456789ABCDEF"; size_type size = this->size(); xstring str; str.reserve(size * 3); // 保留足够的内存,以避免发生内存搬移 const_iterator pos; const_iterator end = this->end(); for (pos = this->begin(); pos < end; ++pos) { if (('0' <= *pos && *pos <= '9') || ('A' <= *pos && *pos <= 'Z') || ('a' <= *pos && *pos <= 'z') || ('-' == *pos) || ('_' == *pos) || ('.' == *pos)) { str += *pos; } else { str += '%'; str += hex_data[(*pos >> 4) & 0x0F]; str += hex_data[*pos & 0x0F]; } } return str; } // xstring::rawurlencode() /** @return 一个新的字符串,其中原始字符串中的百分号(%)跟着两个十六进制数字 被解码成文字字符。比如,<tt>foo%20bar%40baz</tt>被解码成 <tt>foo bar@@baz</tt> @attention rawurldecode()不把加号(+)解码成空格。 urldecode()会处理加号。 @see urlencode() urldecode() rawurlencode() */ xstring xstring::rawurldecode(void) const { size_type size = this->size(); xstring str; str.reserve(size); // 保留足够的内存,以避免发生内存搬移 char buf[2]; unsigned int i; const_iterator pos; const_iterator end = this->end(); for (pos = this->begin(); pos < end; ++pos) { if ('%' == *pos) { if ((pos + 2 < end) && (('0' <= *(pos + 1) && *(pos + 1) <= '9') || ('A' <= *(pos + 1) && *(pos + 1) <= 'Z') || ('a' <= *(pos + 1) && *(pos + 1) <= 'z')) && (('0' <= *(pos + 2) && *(pos + 2) <= '9') || ('A' <= *(pos + 2) && *(pos + 2) <= 'Z') || ('a' <= *(pos + 2) && *(pos + 2) <= 'z'))) { for (i = 0; i < 2; ++i) { buf[i] = *(++pos); if ('0' <= buf[i] && buf[i] <= '9') { buf[i] = buf[i] - '0'; } else if ('A' <= buf[i] && buf[i] <= 'Z') { buf[i] = buf[i] - 'A' + 10; } else // ('a' <= buf[i] && buf[i] <= 'z') { buf[i] = buf[i] - 'a' + 10; } } str += ((buf[0] << 4) & 0xF0) | (buf[1] & 0x0F); continue; } str += *pos; } else { str += *pos; } } return str; } // xstring::rawurldecode()//---------------------------------------------------------------------------- /** base64_encode()返回base64编码完成的数据。这种编码方式是设计用来使二进制数 据安全通过只接受7-bit字节的传输层,比如说电子邮件的信件正文。 Base64编码过的数据比原始数据增加大约33%的数据量。 @code * xstring str = "This is an encoded string"; * cout << str.base64_encode(); @endcode 这将会输出 @code * VGhpcyBpcyBhbiBlbmNvZGVkIHN0cmluZw== @endcode @see base64_decode() chunk_split() <a href="http://www.faqs.org/rfcs/rfc2045">RFC 2045</a> section 6.8 */ xstring xstring::base64_encode(void) const { static const unsigned char dtable[64] = { 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F, 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5A, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7A, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x2B, 0x2F }; int i, hiteof = 0; size_type offset = 0; int olen; olen = 0; xstring str; const char* source = this->data(); size_type length = this->size(); str.reserve((length / 3 + 1) * 4); // 保留足够的内存 while (!hiteof) { unsigned char igroup[3], ogroup[4]; int c, n; igroup[0] = igroup[1] = igroup[2] = 0; for (n = 0; n < 3; n++) { c = *(source++); offset++; if (offset > length) { hiteof = 1; break; } igroup[n] = (unsigned char)c; } if (n > 0) { ogroup[0] = dtable[igroup[0] >> 2]; ogroup[1] = dtable[((igroup[0] & 3) << 4) | (igroup[1] >> 4)]; ogroup[2] = dtable[((igroup[1] & 0xF) << 2) | (igroup[2] >> 6)]; ogroup[3] = dtable[igroup[2] & 0x3F]; /* Replace characters in output stream with "=" pad characters if fewer than three characters were read from the end of the input stream. */ if (n < 3) { ogroup[3] = '='; if (n < 2) { ogroup[2] = '='; } } for (i = 0; i < 4; i++) { str += ogroup[i]; } } } return str; } // xstring::base64_encode() /** base64_decode()解码编码过的数据并返回原始数据。返回的数据有可能是二进制的。 @code * xstring str = "VGhpcyBpcyBhbiBlbmNvZGVkIHN0cmluZw=="; * cout << str.base64_decode(); @endcode 这将会输出 @code * This is an encoded string @endcode @see base64_encode() <a href="http://www.faqs.org/rfcs/rfc2045">RFC 2045</a> section 6.8 */ xstring xstring::base64_decode(void) const { static const unsigned dtable[128] = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x00, 0x00, 0x3F, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3A, 0x3B, 0x3C, 0x3D, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F, 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F, 0x30, 0x31, 0x32, 0x33, 0x00, 0x00, 0x00, 0x00, 0x00 }; int i; size_type offset = 0; int endoffile; int count; endoffile = 0; xstring str; const char* source = this->data(); size_type length = this->size(); str.reserve(length * 3 / 4); // 保留足够的内存 /*CONSTANTCONDITION*/ while (1) { unsigned char a[4], b[4], o[3]; for (i = 0; i < 4; i++) { int c; while (1) { c = *(source++); offset++; if (offset > length) endoffile = 1; if (isspace(c) || c == '\n' || c == '\r') continue; break; } if (endoffile) { return str; } if (dtable[c] & 0x80) { i--; continue; } a[i] = (unsigned char)c; b[i] = (unsigned char)dtable[c]; } o[0] = (b[0] << 2) | (b[1] >> 4); o[1] = (b[1] << 4) | (b[2] >> 2); o[2] = (b[2] << 6) | b[3]; i = a[2] == '=' ? 1 : (a[3] == '=' ? 2 : 3); count = 0; while (count < i) { str += o[count++]; } if (i < 3) { return str; } } return str; } // xstring::base64_decode()//---------------------------------------------------------------------------- /** 这个函数用来把一个字符串分割成小块。比如说按照RFC 2045的方式转换 base64_encode的输出。end(默认值是"@\r@\n")被插入到每chunklen个字符后面。 @return 新字符串的拷贝,不改变原始字符串。 @code * xstring data; * // ... * // format data using RFC 2045 semantics * xstring new_string = data.base64_encode().chunk_split(); @endcode @see explode() split() <a href="http://www.faqs.org/rfcs/rfc2045">RFC 2045</a> section 6.8 */ xstring xstring::chunk_split( unsigned int chunklen, const xstring& end ) const {
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -