📄 xstring.cpp

📁 经典的string 函数库学习资料
💻 CPP
📖 第 1 页 / 共 5 页
字号:
上一页 1 2 3 45
  * vector<xstring> user = data.explode(":");  * cout << user[0];    // foo  * cout << user[1];    // *  * @endcode    @see split() spliti()     */    std::vector<xstring> xstring::explode(        const xstring&  separator,        unsigned int    limit    ) const    {        std::vector<xstring> pieces;        size_type   separator_size = separator.size();        if (0 == separator_size)    // neo debug 2003-10-20        {            return pieces;        }        size_type   idx_begin = 0;        size_type   idx_end = 0;        unsigned int i = 0;        const_iterator  begin = this->begin();        while ((idx_end = this->find(separator, idx_begin)) != npos)        {            ++i;            if (limit == i)            {                break;            }            pieces.push_back(xstring(begin + idx_begin, begin + idx_end));            idx_begin = idx_end + separator_size;        }        pieces.push_back(xstring(begin + idx_begin, this->end()));        return pieces;    }   // xstring::explode()//----------------------------------------------------------------------------    /**    @return 字符串的拷贝，在原始字符串的下列字符前面增加反斜线（@\）。    @code    . \ + * ? [ ^ ] ( $ )    @endcode    @see addslashes() htmlspecialchars() nl2br() stripslashes()     */    xstring xstring::quotemeta(void) const    {        size_type size = this->size();        xstring str;        str.reserve(size * 2);  // 保留足够的内存，以避免发生内存搬移        const_iterator  pos;        const_iterator  end = this->end();        for (pos = this->begin(); pos < end; ++pos)        {            switch (*pos)            {            case '.' :            case '\\' :            case '+' :            case '*' :            case '?' :            case '[' :            case '^' :            case ']' :            case '(' :            case '$' :            case ')' :                str += '\\';                break;            }            str += *pos;        }        return str;    }   // xstring::quotemeta()    /**    @return 原始字符串在左侧、右侧或者两侧被填补到指定长度的字符串的拷贝。    如果可选的参数pad_string没有指定，将使用空格进行填补。    可选的参数pad_type可以是@link x::PAD_TYPE PAD_RIGHT @endlink、    @link x::PAD_TYPE PAD_LEFT @endlink或者@link x::PAD_TYPE PAD_BOTH @endlink。    如果没有指定pad_type，默认值是@link x::PAD_TYPE PAD_RIGHT @endlink。    @code  * xstring input = "Alien";  * cout << input.str_pad(10);                  // 产生 "Alien     "  * cout << input.str_pad(10, "-=", PAD_LEFT);  // 产生 "-=-=-Alien"  * cout << input.str_pad(10, "_", PAD_BOTH);   // 产生 "__Alien___"  * cout << input.str_pad(6, "___");            // 产生 "Alien_"    @endcode     */    xstring xstring::str_pad(        size_type       pad_length,        const xstring&  pad_string,        PAD_TYPE        pad_type    ) const    {        size_type size = this->size();        if (pad_length <= size)        {            return *this;        }        xstring str;        str.reserve(pad_length);    // 保留足够的内存，以避免发生内存搬移        size_type pad_size = pad_string.size();        size_type i;        switch (pad_type)        {        case PAD_LEFT :            for (i = 0; i < pad_length - size; ++i)            {                str += pad_string[i % pad_size];            }            str += *this;            break;        case PAD_BOTH :            for (i = 0; i < (pad_length - size) / 2; ++i)            {                str += pad_string[i % pad_size];            }            str += *this;            for (i = 0; i < (pad_length - size + 1) / 2; ++i)            {                str += pad_string[i % pad_size];            }            break;        case PAD_RIGHT :            str += *this;            for (i = 0; i < pad_length - size; ++i)            {                str += pad_string[i % pad_size];            }            break;        }        return str;    }   // xstring::str_pad()    /**    @return 原始字符重复multiplier次的字符串。如果multiplier被设置成0，这个函    数将返回空字符串。    @code  * xstring str = "-=";  * cout << str.str_repeat(10);    @endcode    将会输出：    @code  * -=-=-=-=-=-=-=-=-=-=    @endcode    @see str_pad()     */    xstring xstring::str_repeat(unsigned int multiplier) const    {        size_type size = this->size();        xstring str;        str.reserve(size * multiplier);        size_type i;        for (i = 0; i < multiplier; ++i)        {            str += *this;        }        return str;    }//----------------------------------------------------------------------------    /**    @return 去除HTML标签后的字符串拷贝。    @code  * xstring str = "<b>Hello world!</b>";  * cout << str.strip_tags();   // 输出 Hello world!    @endcode     */    xstring xstring::strip_tags(void) const    {        size_type size = this->size();        xstring str;        str.reserve(size);  // 保留足够的内存，以避免发生内存搬移        const_iterator  pos;        const_iterator  end = this->end();        for (pos = this->begin(); pos < end; ++pos)        {            if ('<' == *pos)            {                for (; pos < end; ++pos)                {                    if ('>' == *pos)                    {                        break;                    }                }            }            else            {                str += *pos;            }        }        return str;    }   // xstring::strip_tags()//----------------------------------------------------------------------------    xstring xstring::getWord(const xstring& separator)    {        xstring     piece;        size_type   pos;        if ((pos = this->find(separator)) != npos)        {            piece = xstring(*this, pos + separator.size());            this->erase(pos);        }        this->swap(piece);        return piece;    }    // 是否全是由GB2312字符集中的汉字(包括ASCII字符)构成    bool xstring::isGB2312(void) const    {        const_iterator end = this->end();        const_iterator pos;        for (pos = this->begin(); pos < end; ++pos)        {            if (static_cast<unsigned char>(*pos) >= 0x80)            { // 首位为1                if (pos + 1 >= end) return false;    // neo patch                if ((static_cast<unsigned char>(*pos) >= 0xb0) &&                    (static_cast<unsigned char>(*pos) <= 0xf7))                { // 区号正确(GB2312汉字)                    ++pos;                    if ((static_cast<unsigned char>(*pos) < 0xa1) ||                        (static_cast<unsigned char>(*pos) > 0xfe))                        return false;                } // half if -- GB2312汉字                else if ((static_cast<unsigned char>(*pos) >= 0xa1) &&                         (static_cast<unsigned char>(*pos) <= 0xa9))                { // GB2312非汉字符号                    ++pos;                    if ((static_cast<unsigned char>(*pos) < 0xa1) ||                        (static_cast<unsigned char>(*pos) > 0xfe))                        return false;                    switch (static_cast<unsigned char>(*(pos - 1)))                    { // 区                    case 0xa1: // A1A1是空格，所以也封掉                        if (static_cast<unsigned char>(*pos) == 0xa1)                            return false;                        break;                    case 0xa4:                        if ((static_cast<unsigned char>(*pos) >= 0xf4) &&                            (static_cast<unsigned char>(*pos) <= 0xfe))                            return false;                        break;                    case 0xa5:                        if ((static_cast<unsigned char>(*pos) >= 0xf7) &&                            (static_cast<unsigned char>(*pos) <= 0xfe))                            return false;                        break;                    case 0xa6:                        if (((static_cast<unsigned char>(*pos) >= 0xb9)  &&                             (static_cast<unsigned char>(*pos) <= 0xc0)) ||                            ((static_cast<unsigned char>(*pos) >= 0xd9)  &&                             (static_cast<unsigned char>(*pos) <= 0xfe)))                            return false;                        break;                    case 0xa7:                        if (((static_cast<unsigned char>(*pos) >= 0xc2)  &&                             (static_cast<unsigned char>(*pos) <= 0xd0)) ||                            ((static_cast<unsigned char>(*pos) >= 0xf2)  &&                             (static_cast<unsigned char>(*pos) <= 0xfe)))                            return false;                        break;                    case 0xa8:                        if (((static_cast<unsigned char>(*pos) >= 0xa1)  &&                             (static_cast<unsigned char>(*pos) <= 0xc4)) ||                            ((static_cast<unsigned char>(*pos) >= 0xea)  &&                             (static_cast<unsigned char>(*pos) <= 0xfe)))                            return false;                        break;                    case 0xa9:                        if (((static_cast<unsigned char>(*pos) >= 0xa1)  &&                             (static_cast<unsigned char>(*pos) <= 0xa3)) ||                            ((static_cast<unsigned char>(*pos) >= 0xf0)  &&                             (static_cast<unsigned char>(*pos) <= 0xfe)))                            return false;                        break;                    }                }   // end half if -- GB2312非汉字符号                else                {   // 区号不正确                    return false;                }            }   // if 首位为1            else if (static_cast<unsigned char>(*pos) <= 0x20)   // neo patch            {                return false;            }        }   // while        return true;    }//----------------------------------------------------------------------------    // private static    xstring xstring::regerrmsg(int errcode, regex_t* compiled)    {        size_t length = regerror(errcode, compiled, NULL, 0);        char buffer[length];        regerror(errcode, compiled, buffer, length);        return xstring(buffer);    }    void xstring::regcomp_throw(        regex_t* compiled,        const char* pattern,        int cflags    )    {        int errcode = regcomp(compiled, pattern, cflags);        if (errcode != 0)   // 出错        {            /// @exception std::runtime_error 出错时抛出异常            throw std::runtime_error("regular expression error in xstring: "                + regerrmsg(errcode, compiled));        }    }    int xstring::regexec_throw(        regex_t* compiled,        const char* string,        size_t nmatch,        regmatch_t matchptr[],        int eflags    )    {        int errcode = regexec(compiled, string, nmatch, matchptr, eflags);        switch (errcode)        {            case 0 :            case REG_NOMATCH :  // 不匹配                return errcode;            default :                /// @exception std::runtime_error 出错时抛出异常                throw std::runtime_error("regular expression error in xstring: "                    + regerrmsg(errcode, compiled));        }    }    /**    以区分大小写的方式在字符串中寻找与给定的正则表达式regexp所匹配的子串。    如果找到与regexp中圆括号内的子模式相匹配的子串并且函数调用给出了第二个参数    regs，则匹配项将被存入regs数组中。regs[1]包含第一个左圆括号开始的子串，    regs[2]包含第二个子串，以此类推。regs[0] 包含整个匹配的字符串。    @return 如果在字符串中找到regexp模式的匹配则返回true，如果没有找到匹配则返    回false。    以下代码片断接受ISO格式的日期（YYYY-MM-DD）然后以DD.MM.YYYY格式显示：    @code  * xstring date;  * vector<xstring> regs;  * // ...  * if (date.ereg("([0-9]{4})-([0-9]{1,2})-([0-9]{1,2})", regs))  * {  *     cout << regs[3] << '.' << regs[2] << '.' << regs[1];  * }  * else  * {  *     cout << "Invalid date format: " << date;  * }    @endcode    @exception std::runtime_error 如果正则表达式语法错误，抛出异常    @bug 1.0.2版本以前，返回值错误 （感谢grace <wangzhihui@xilu.com>）    @bug 1.0.3版本以前，正则表达式不匹配时，进入不可预期的状态    （感谢grace <wangzhihui@xilu.com>）    @see eregi() ereg_replace() eregi_replace()     */    bool xstring::ereg(const xstring& regexp, std::vector<xstring>& regs) const    {        regex_t preg;        regcomp_throw(&preg, regexp.c_str(), REG_EXTENDED);        ON_BLOCK_EXIT(regfree, &preg);  // 自动毁尸灭迹        unsigned int subreg_num = preg.re_nsub + 1;        regs.resize(subreg_num);        regmatch_t matchptr[subreg_num];        if (0 != regexec_throw(&preg, this->c_str(), subreg_num, matchptr, 0))        {            return false;        }        const_iterator begin = this->begin();        for (unsigned int i = 0; i < subreg_num; ++i)        {            regs[i] = xstring(begin + matchptr[i].rm_so,                              begin + matchptr[i].rm_eo);        }        return true;    }    /**    这是一个重载函数     */    bool xstring::ereg(const xstring& regexp) const    {        regex_t preg;        regcomp_throw(&preg, regexp.c_str(), REG_EXTENDED | REG_NOSUB);        ON_BLOCK_EXIT(regfree, &preg);  // 自动毁尸灭迹        return regexec_throw(&preg, this->c_str(), 0, NULL, 0) ? false : true;    }    /**    本函数在字符串中扫描与regexp匹配的部分，并将其替换为replacement。    @return 替换后的字符串。（如果没有可供替换的匹配项则会返回原字符串。）    如果regexp包含有括号内的子串，则replacement可以包含形如@\digit的子串，这些    子串将被替换为数字表示的的第几个括号内的子串；@\0则
上一页 1 2 3 45
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -