📄 xstring.cpp

📁 经典的string 函数库学习资料
💻 CPP
📖 第 1 页 / 共 5 页
字号:
    application/x-www-form-urlencoded媒体类型也是按照这种方式编码。由于历史原    因，这和RFC 1738编码（参考 rawurlencode()）是不同的，空格在这里被编码成加    号（+）。这个函数通常用来把一个字符串编码作为URL查询的一部分，这样就可以把    变量传到下一个页面。    @code  * xstring userinput;  * // ...  * cout << "<a href=\"mycgi?foo=" << userinput.urlencode() << "\">";    @endcode    @see urldecode() rawurlencode() rawurldecode()     */    xstring xstring::urlencode(void) const    {        static char hex_data[] = "0123456789ABCDEF";        size_type size = this->size();        xstring str;        str.reserve(size * 3);  // 保留足够的内存，以避免发生内存搬移        const_iterator  pos;        const_iterator  end = this->end();        for (pos = this->begin(); pos < end; ++pos)        {            if (('0' <= *pos && *pos <= '9') ||                ('A' <= *pos && *pos <= 'Z') ||                ('a' <= *pos && *pos <= 'z') ||                ('-' == *pos) || ('_' == *pos) || ('.' == *pos))            {               str += *pos;            }            else if (' ' == *pos)            {               str += '+';            }            else            {                str += '%';                str += hex_data[(*pos >> 4) & 0x0F];                str += hex_data[*pos & 0x0F];            }        }        return str;    }   // xstring::urlencode()    /**    @return 将字符串中任何%##形式的编码解码，解码后的字符串被返回    @code    xstring QUERY_STRING;    // ...    vector<xstring> a = QUERY_STRING.explode("&");    unsigned int i = 0;    xstring b;    while (i < a.size())    {        b = a[i].getWord("=");        cout << "Value for parameter " << b.urldecode().htmlspecialchars()             << " is " << a[i].urldecode().htmlspecialchars() << "<br />\n";        ++i;    }    @endcode    @see urlencode() rawurlencode() rawurldecode()     */    xstring xstring::urldecode(void) const    {        size_type size = this->size();        xstring str;        str.reserve(size);  // 保留足够的内存，以避免发生内存搬移        char buf[2];        unsigned int i;        const_iterator  pos;        const_iterator  end = this->end();        for (pos = this->begin(); pos < end; ++pos)        {            if ('%' == *pos)            {                if ((pos + 2 < end) &&                   (('0' <= *(pos + 1) && *(pos + 1) <= '9') ||                    ('A' <= *(pos + 1) && *(pos + 1) <= 'Z') ||                    ('a' <= *(pos + 1) && *(pos + 1) <= 'z')) &&                   (('0' <= *(pos + 2) && *(pos + 2) <= '9') ||                    ('A' <= *(pos + 2) && *(pos + 2) <= 'Z') ||                    ('a' <= *(pos + 2) && *(pos + 2) <= 'z')))                {                    for (i = 0; i < 2; ++i)                    {                        buf[i] = *(++pos);                        if ('0' <= buf[i] && buf[i] <= '9')                        {                            buf[i] = buf[i] - '0';                        }                        else if ('A' <= buf[i] && buf[i] <= 'Z')                        {                            buf[i] = buf[i] - 'A' + 10;                        }                        else // ('a' <= buf[i] && buf[i] <= 'z')                        {                            buf[i] = buf[i] - 'a' + 10;                        }                    }                    str += ((buf[0] << 4) & 0xF0) | (buf[1] & 0x0F);                    continue;                }                str += *pos;            }            else if ('+' == *pos)            {                str += ' ';            }            else            {                str += *pos;            }        }        return str;    }   // xstring::urldecode()//----------------------------------------------------------------------------    /**    @return 一个新的字符串，其中所有的非字母数字字符，除了-_.之外，都会被替换    成一个百分号（%）跟着两个十六进制数字。    这是RFC 1738上面描述的编码方式，用来保护文字内容不被特殊的URL分隔字符打断。    这也用来保护URL不被传输媒体上字符转换损坏。比如，你想要在一个FTP URL中包含    密码：    @code  * cout << "<a href=\"ftp://user:" << xstring("foo @+%/").rawurlencode()  *      << "@ftp.my.com/x.txt\">";    @endcode    或者，你通过URL中的PATH_INFO传送信息：    @code  * cout << "<a href=\"http://x.com/department_list_script/"  *      << xstring("sales and marketing/Miami").rawurlencode() << "\">";    @endcode    @see urlencode() urldecode() rawurldecode()     */    xstring xstring::rawurlencode(void) const    {        static char hex_data[] = "0123456789ABCDEF";        size_type size = this->size();        xstring str;        str.reserve(size * 3);  // 保留足够的内存，以避免发生内存搬移        const_iterator  pos;        const_iterator  end = this->end();        for (pos = this->begin(); pos < end; ++pos)        {            if (('0' <= *pos && *pos <= '9') ||                ('A' <= *pos && *pos <= 'Z') ||                ('a' <= *pos && *pos <= 'z') ||                ('-' == *pos) || ('_' == *pos) || ('.' == *pos))            {               str += *pos;            }            else            {                str += '%';                str += hex_data[(*pos >> 4) & 0x0F];                str += hex_data[*pos & 0x0F];            }        }        return str;    }   // xstring::rawurlencode()    /**    @return 一个新的字符串，其中原始字符串中的百分号（%）跟着两个十六进制数字    被解码成文字字符。比如，<tt>foo%20bar%40baz</tt>被解码成    <tt>foo bar@@baz</tt>    @attention rawurldecode()不把加号（+）解码成空格。 urldecode()会处理加号。    @see urlencode() urldecode() rawurlencode()    */    xstring xstring::rawurldecode(void) const    {        size_type size = this->size();        xstring str;        str.reserve(size);  // 保留足够的内存，以避免发生内存搬移        char buf[2];        unsigned int i;        const_iterator  pos;        const_iterator  end = this->end();        for (pos = this->begin(); pos < end; ++pos)        {            if ('%' == *pos)            {                if ((pos + 2 < end) &&                   (('0' <= *(pos + 1) && *(pos + 1) <= '9') ||                    ('A' <= *(pos + 1) && *(pos + 1) <= 'Z') ||                    ('a' <= *(pos + 1) && *(pos + 1) <= 'z')) &&                   (('0' <= *(pos + 2) && *(pos + 2) <= '9') ||                    ('A' <= *(pos + 2) && *(pos + 2) <= 'Z') ||                    ('a' <= *(pos + 2) && *(pos + 2) <= 'z')))                {                    for (i = 0; i < 2; ++i)                    {                        buf[i] = *(++pos);                        if ('0' <= buf[i] && buf[i] <= '9')                        {                            buf[i] = buf[i] - '0';                        }                        else if ('A' <= buf[i] && buf[i] <= 'Z')                        {                            buf[i] = buf[i] - 'A' + 10;                        }                        else // ('a' <= buf[i] && buf[i] <= 'z')                        {                            buf[i] = buf[i] - 'a' + 10;                        }                    }                    str += ((buf[0] << 4) & 0xF0) | (buf[1] & 0x0F);                    continue;                }                str += *pos;            }            else            {                str += *pos;            }        }        return str;    }   // xstring::rawurldecode()//----------------------------------------------------------------------------    /**    base64_encode()返回base64编码完成的数据。这种编码方式是设计用来使二进制数    据安全通过只接受7-bit字节的传输层，比如说电子邮件的信件正文。    Base64编码过的数据比原始数据增加大约33%的数据量。    @code  * xstring str = "This is an encoded string";  * cout << str.base64_encode();    @endcode    这将会输出    @code  * VGhpcyBpcyBhbiBlbmNvZGVkIHN0cmluZw==    @endcode    @see base64_decode() chunk_split()    <a href="http://www.faqs.org/rfcs/rfc2045">RFC 2045</a> section 6.8    */    xstring xstring::base64_encode(void) const    {        static const unsigned char dtable[64] =        {            0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48,            0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F, 0x50,            0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58,            0x59, 0x5A, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66,            0x67, 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E,            0x6F, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76,            0x77, 0x78, 0x79, 0x7A, 0x30, 0x31, 0x32, 0x33,            0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x2B, 0x2F        };        int i, hiteof = 0;        size_type offset = 0;        int olen;        olen = 0;        xstring str;        const char* source = this->data();        size_type length = this->size();        str.reserve((length / 3 + 1) * 4);  // 保留足够的内存        while (!hiteof) {            unsigned char igroup[3], ogroup[4];            int c, n;            igroup[0] = igroup[1] = igroup[2] = 0;            for (n = 0; n < 3; n++) {                c = *(source++);                offset++;                if (offset > length) {                    hiteof = 1;                    break;                }                igroup[n] = (unsigned char)c;            }            if (n > 0) {                ogroup[0] = dtable[igroup[0] >> 2];                ogroup[1] = dtable[((igroup[0] & 3) << 4) | (igroup[1] >> 4)];                ogroup[2] = dtable[((igroup[1] & 0xF) << 2) | (igroup[2] >> 6)];                ogroup[3] = dtable[igroup[2] & 0x3F];                /* Replace characters in output stream with "=" pad                characters if fewer than three characters were                read from the end of the input stream. */                if (n < 3) {                    ogroup[3] = '=';                    if (n < 2) {                        ogroup[2] = '=';                    }                }                for (i = 0; i < 4; i++) {                    str += ogroup[i];                }            }        }        return str;    }   // xstring::base64_encode()    /**    base64_decode()解码编码过的数据并返回原始数据。返回的数据有可能是二进制的。    @code  * xstring str = "VGhpcyBpcyBhbiBlbmNvZGVkIHN0cmluZw==";  * cout << str.base64_decode();    @endcode    这将会输出    @code  * This is an encoded string    @endcode    @see base64_encode()    <a href="http://www.faqs.org/rfcs/rfc2045">RFC 2045</a> section 6.8     */    xstring xstring::base64_decode(void) const    {        static const unsigned dtable[128] =        {            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,            0x00, 0x00, 0x00, 0x3E, 0x00, 0x00, 0x00, 0x3F,            0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3A, 0x3B,            0x3C, 0x3D, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,            0x00, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06,            0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E,            0x0F, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16,            0x17, 0x18, 0x19, 0x00, 0x00, 0x00, 0x00, 0x00,            0x00, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F, 0x20,            0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28,            0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F, 0x30,            0x31, 0x32, 0x33, 0x00, 0x00, 0x00, 0x00, 0x00        };        int i;        size_type offset = 0;        int endoffile;        int count;        endoffile = 0;        xstring str;        const char* source = this->data();        size_type length = this->size();        str.reserve(length * 3 / 4);        // 保留足够的内存        /*CONSTANTCONDITION*/        while (1) {            unsigned char a[4], b[4], o[3];            for (i = 0; i < 4; i++) {                int c;                while (1) {                    c = *(source++);                    offset++;                    if (offset > length) endoffile = 1;                    if (isspace(c) || c == '\n' || c == '\r') continue;                    break;                }                if (endoffile) {                    return str;                }                if (dtable[c] & 0x80) {                    i--;                    continue;                }                a[i] = (unsigned char)c;                b[i] = (unsigned char)dtable[c];            }            o[0] = (b[0] << 2) | (b[1] >> 4);            o[1] = (b[1] << 4) | (b[2] >> 2);            o[2] = (b[2] << 6) | b[3];            i = a[2] == '=' ? 1 : (a[3] == '=' ? 2 : 3);            count = 0;            while (count < i) {                str += o[count++];            }            if (i < 3) {                return str;            }        }        return str;    }   // xstring::base64_decode()//----------------------------------------------------------------------------    /**    这个函数用来把一个字符串分割成小块。比如说按照RFC 2045的方式转换    base64_encode的输出。end（默认值是"@\r@\n"）被插入到每chunklen个字符后面。    @return 新字符串的拷贝，不改变原始字符串。    @code  * xstring data;  * // ...  * // format data using RFC 2045 semantics  * xstring new_string = data.base64_encode().chunk_split();    @endcode    @see explode() split()    <a href="http://www.faqs.org/rfcs/rfc2045">RFC 2045</a> section 6.8     */    xstring xstring::chunk_split(        unsigned int    chunklen,        const xstring&  end    ) const    {
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -