首页 › 资源下载 › 生物技术 › ncbi源码 › 源码查看
ncbistr.cpp

来自「ncbi源码」· C++ 代码 · 共 1,876 行 · 第 1/4 页
CPP
1,876 行
            do {                if ( isprint(*it) ) {                    // escape '"' and '\\' anyway                    if ( *it == '"' || *it == '\\' )                        out.put('\\');                    out.put(*it);                }                else if (*it == '\n') {                    // newline needs special processing                    if (nl_mode == eNewLine_Quote) {                        out.write("\\n", 2);                    }                    else {                        out.put('\n');                    }                } else {                    // all other non-printable characters need to be escaped                    out.put('\\');                    if (*it == '\t') {                        out.put('t');                    } else if (*it == '\r') {                        out.put('r');                    } else if (*it == '\v') {                        out.put('v');                    } else {                        // hex string for non-standard codes                        out.put('x');                        out.put(s_Hex[(unsigned char) *it >> 4]);                        out.put(s_Hex[(unsigned char) *it & 15]);                    }                }            } while (++it < it_end); // it_end is from ITERATE macro            return CNcbiOstrstreamToString(out);        }    }    // all characters are good - return orignal string    return str;}string NStr::ParseEscapes(const string& str){    string out;    out.reserve(str.size()); // can only be smaller    SIZE_TYPE pos = 0;    while (pos < str.size()) {        SIZE_TYPE pos2 = str.find('\\', pos);        if (pos2 == NPOS) {            out += str.substr(pos);            break;        }        out += str.substr(pos, pos2 - pos);        if (++pos2 == str.size()) {            NCBI_THROW2(CStringException, eFormat,                        "Unterminated escape sequence", pos2);        }        switch (str[pos2]) {        case 'a':  out += '\a';  break;        case 'b':  out += '\b';  break;        case 'f':  out += '\f';  break;        case 'n':  out += '\n';  break;        case 'r':  out += '\r';  break;        case 't':  out += '\t';  break;        case 'v':  out += '\v';  break;        case 'x':        {            pos = pos2 + 1;            while (pos2 <= pos  &&  pos2 + 1 < str.size()                   &&  isxdigit(str[pos2 + 1])) {                ++pos2;            }            if (pos2 >= pos) {                out += static_cast<char>                    (StringToUInt(str.substr(pos, pos2 - pos + 1), 16));            } else {                NCBI_THROW2(CStringException, eFormat,                            "\\x used with no following digits", pos);            }            break;        }        case '0':  case '1':  case '2':  case '3':        case '4':  case '5':  case '6':  case '7':        {            pos = pos2;            unsigned char c = str[pos2] - '0';            while (pos2 < pos + 3  &&  pos2 + 1 < str.size()                   &&  str[pos2 + 1] >= '0'  &&  str[pos2 + 1] <= '7') {                c = (c << 3) | (str[++pos2] - '0');            }            out += c;        }        default:            out += str[pos2];        }        pos = pos2 + 1;    }    return out;}// Determines the end of an HTML <...> tag, accounting for attributes// and comments (the latter allowed only within <!...>).static SIZE_TYPE s_EndOfTag(const string& str, SIZE_TYPE start){    _ASSERT(start < str.size()  &&  str[start] == '<');    bool comments_ok = (start + 1 < str.size()  &&  str[start + 1] == '!');    for (SIZE_TYPE pos = start + 1;  pos < str.size();  ++pos) {        switch (str[pos]) {        case '>': // found the end            return pos;        case '\"': // start of "string"; advance to end            pos = str.find('\"', pos + 1);            if (pos == NPOS) {                NCBI_THROW2(CStringException, eFormat,                            "Unclosed string in HTML tag", start);                // return pos;            }            break;        case '-': // possible start of -- comment --; advance to end            if (comments_ok  &&  pos + 1 < str.size()                &&  str[pos + 1] == '-') {                pos = str.find("--", pos + 2);                if (pos == NPOS) {                    NCBI_THROW2(CStringException, eFormat,                                "Unclosed comment in HTML tag", start);                    // return pos;                } else {                    ++pos;                }            }        }    }    NCBI_THROW2(CStringException, eFormat, "Unclosed HTML tag", start);    // return NPOS;}// Determines the end of an HTML &foo; character/entity reference// (which might not actually end with a semicolon :-/)static SIZE_TYPE s_EndOfReference(const string& str, SIZE_TYPE start){    _ASSERT(start < str.size()  &&  str[start] == '&');#ifdef NCBI_STRICT_HTML_REFS    return str.find(';', start + 1);#else    SIZE_TYPE pos = str.find_first_not_of        ("#0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz",         start + 1);    if (pos == NPOS  ||  str[pos] == ';') {        return pos;    } else {        return pos - 1;    }#endif}static SIZE_TYPE s_VisibleWidth(const string& str, bool is_html){    if (is_html) {        SIZE_TYPE width = 0, pos = 0;        for (;;) {            SIZE_TYPE pos2 = str.find_first_of("<&", pos);            if (pos2 == NPOS) {                width += str.size() - pos;                break;            } else {                width += pos2 - pos;                if (str[pos2] == '&') {                    ++width;                    pos = s_EndOfReference(str, pos);                } else {                    pos = s_EndOfTag(str, pos);                }                if (pos == NPOS) {                    break;                } else {                    ++pos;                }            }        }        return width;    } else {        return str.size();    }}list<string>& NStr::Wrap(const string& str, SIZE_TYPE width,                         list<string>& arr, NStr::TWrapFlags flags,                         const string* prefix, const string* prefix1){    if (prefix == 0) {        prefix = &kEmptyStr;    }    const string* pfx = prefix1 ? prefix1 : prefix;    SIZE_TYPE     pos = 0, len = str.size();    string        hyphen; // "-" or empty    bool          is_html  = flags & fWrap_HTMLPre ? true : false;    enum EScore { // worst to best        eForced,        ePunct,        eSpace,        eNewline    };    while (pos < len) {        SIZE_TYPE column     = s_VisibleWidth(*pfx, is_html);        SIZE_TYPE column0    = column;        // the next line will start at best_pos        SIZE_TYPE best_pos   = NPOS;        EScore    best_score = eForced;        for (SIZE_TYPE pos2 = pos;  pos2 < len && column <= width;             ++pos2, ++column) {            EScore    score     = eForced;            SIZE_TYPE score_pos = pos2;            char      c         = str[pos2];            if (c == '\n') {                best_pos   = pos2;                best_score = eNewline;                break;            } else if (isspace(c)) {                if (pos2 > 0  &&  isspace(str[pos2 - 1])) {                    continue; // take the first space of a group                }                score = eSpace;            } else if (is_html  &&  c == '<') {                // treat tags as zero-width...                pos2 = s_EndOfTag(str, pos2);                --column;            } else if (is_html  &&  c == '&') {                // ...and references as single characters                pos2 = s_EndOfReference(str, pos2);            } else if (ispunct(c)) {                if (c == '('  ||  c == '['  ||  c == '{'  ||  c == '<'                    ||  c == '`') { // opening element                    score = ePunct;                } else if (score_pos < len - 1) {                    // Prefer breaking *after* most types of punctuation.                    score = ePunct;                    ++score_pos;                }            }            if (score >= best_score) {                best_pos   = score_pos;                best_score = score;            }            while (pos2 < len - 1  &&  str[pos2 + 1] == '\b') {                // Account for backspaces                ++pos2;                if (column > column0) {                    --column;                }            }        }        if (best_score != eNewline  &&  column <= width) {            // If the whole remaining text can fit, don't split it...            best_pos = len;        } else if (best_score == eForced  &&  (flags & fWrap_Hyphenate)) {            hyphen = "-";            --best_pos;        }        arr.push_back(*pfx);        {{ // eat backspaces and the characters (if any) that precede them            string    line(str, pos, best_pos - pos);            SIZE_TYPE bs = 0;            while ((bs = line.find('\b', bs)) != NPOS) {                if (bs > 0) {                    line.erase(bs - 1, 2);                } else {                    line.erase(0, 1);                }            }            arr.back() += line;        }}        arr.back() += hyphen;        pos    = best_pos;        pfx    = prefix;        hyphen = kEmptyStr;        if (best_score == eSpace) {            // If breaking at a group of spaces, skip over the whole group            while (pos < len  &&  isspace(str[pos])  &&  str[pos] != '\n') {                ++pos;            }        } else if (best_score == eNewline) {            ++pos;        }        while (pos < len  &&  str[pos] == '\b') {            ++pos;        }    }    return arr;}list<string>& NStr::WrapList(const list<string>& l, SIZE_TYPE width,                             const string& delim, list<string>& arr,                             NStr::TWrapFlags flags, const string* prefix,                             const string* prefix1){    if (l.empty()) {        return arr;    }    const string* pfx      = prefix1 ? prefix1 : prefix;    string        s        = *pfx;    bool          is_html  = flags & fWrap_HTMLPre ? true : false;    SIZE_TYPE     column   = s_VisibleWidth(s,     is_html);    SIZE_TYPE     delwidth = s_VisibleWidth(delim, is_html);    bool          at_start = true;    ITERATE (list<string>, it, l) {        SIZE_TYPE term_width = s_VisibleWidth(*it, is_html);        if (at_start) {            if (column + term_width <= width) {                s += *it;                column += term_width;                at_start = false;            } else {                // Can't fit, even on its own line; break separately.                Wrap(*it, width, arr, flags, prefix, pfx);                pfx      = prefix;                s        = *prefix;                column   = s_VisibleWidth(s, is_html);                at_start = true;            }        } else if (column + delwidth + term_width <= width) {            s += delim;            s += *it;            column += delwidth + term_width;            at_start = false;        } else {            // Can't fit on this line; break here and try again.            arr.push_back(s);            pfx      = prefix;            s        = *prefix;            column   = s_VisibleWidth(s, is_html);            at_start = true;            --it;        }    }    arr.push_back(s);    return arr;}#if !defined(HAVE_STRDUP)extern char* strdup(const char* str){    if ( !str )        return 0;    size_t size   = strlen(str) + 1;    void*  result = malloc(size);    return (char*) (result ? memcpy(result, str, size) : 0);}#endif///////////////////////////////////////////////////////////////////////////////  CStringUTF8void CStringUTF8::x_Append(const char* src){    const char* srcBuf;    size_t needed = 0;    for (srcBuf = src; *srcBuf; ++srcBuf) {        Uint1 ch = *srcBuf;        if (ch < 0x80) {            ++needed;        } else {            needed += 2;        }    }    if ( !needed )        return;    reserve(length()+needed+1);    for (srcBuf = src; *srcBuf; ++srcBuf) {        Uint1 ch = *srcBuf;        if (ch < 0x80) {            append(1, ch);        } else {            append(1, Uint1((ch >> 6) | 0xC0));            append(1, Uint1((ch & 0x3F) | 0x80));        }    }}#if defined(HAVE_WSTRING)void CStringUTF8::x_Append(const wchar_t* src){    const wchar_t* srcBuf;    size_t needed = 0;    for (srcBuf = src; *srcBuf; ++srcBuf) {        Uint2 ch = *srcBuf;        if (ch < 0x80) {            ++needed;        } else if (ch < 0x800) {            needed += 2;        } else {            needed += 3;        }    }    if ( !needed )        return;    reserve(length()+needed+1);    for (srcBuf = src; *srcBuf; ++srcBuf) {        Uint2 ch = *srcBuf;        if (ch < 0x80) {            append(1, ch);        }        else if (ch < 0x800) {            append(1, Uint2((ch >> 6) | 0xC0));            append(1, Uint2((ch & 0x3F) | 0x80));        } else {            append(1, Uint2((ch >> 12) | 0xE0));            append(1, Uint2(((ch >> 6) & 0x3F) | 0x80));            append(1, Uint2((ch & 0x3F) | 0x80));        }    }}#endif // HAVE_WSTRINGstring CStringUTF8::AsAscii(void) const{    string result;    const char* srcBuf;    size_t needed = 0;    bool bad = false;    bool enough = true;    for (srcBuf = c_str(); *srcBuf; ++srcBuf) {        Uint1 ch = *srcBuf;        if ((ch & 0x80) == 0) {            ++needed;        } else if ((ch & 0xE0) == 0xC0) {            enough = (ch & 0x1F) <= 0x03;            if (enough) {                ++needed;                ch = *(++srcBuf);                bad = (ch & 0xC0) != 0x80;            }        } else if ((ch & 0xF0) == 0xE0) {            enough = false;        } else {            bad = true;        }        if (!enough) {            NCBI_THROW2(CStringException, eConvert,                        "Cannot convert UTF8 string to single-byte string",                        s_DiffPtr(srcBuf,c_str()));        }        if (bad) {            NCBI_THROW2(CStringException, eFormat,
ncbistr.cpp - 源码说明

本页面展示了「ncbi源码」中的 ncbistr.cpp 源码文件，采用 C++ 编程语言编写，共 1,876 行代码。您可以在线阅读完整代码内容，也可以返回资源详情页下载完整源码包进行本地学习和开发。
虫虫下载站收录了大量与ncbi相关的技术资源，包括源代码、技术文档、电路图等，是电子工程师和嵌入式开发者的专业学习平台。
⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?