📄 ncbistr.cpp
字号:
do { if ( isprint(*it) ) { // escape '"' and '\\' anyway if ( *it == '"' || *it == '\\' ) out.put('\\'); out.put(*it); } else if (*it == '\n') { // newline needs special processing if (nl_mode == eNewLine_Quote) { out.write("\\n", 2); } else { out.put('\n'); } } else { // all other non-printable characters need to be escaped out.put('\\'); if (*it == '\t') { out.put('t'); } else if (*it == '\r') { out.put('r'); } else if (*it == '\v') { out.put('v'); } else { // hex string for non-standard codes out.put('x'); out.put(s_Hex[(unsigned char) *it >> 4]); out.put(s_Hex[(unsigned char) *it & 15]); } } } while (++it < it_end); // it_end is from ITERATE macro return CNcbiOstrstreamToString(out); } } // all characters are good - return orignal string return str;}string NStr::ParseEscapes(const string& str){ string out; out.reserve(str.size()); // can only be smaller SIZE_TYPE pos = 0; while (pos < str.size()) { SIZE_TYPE pos2 = str.find('\\', pos); if (pos2 == NPOS) { out += str.substr(pos); break; } out += str.substr(pos, pos2 - pos); if (++pos2 == str.size()) { NCBI_THROW2(CStringException, eFormat, "Unterminated escape sequence", pos2); } switch (str[pos2]) { case 'a': out += '\a'; break; case 'b': out += '\b'; break; case 'f': out += '\f'; break; case 'n': out += '\n'; break; case 'r': out += '\r'; break; case 't': out += '\t'; break; case 'v': out += '\v'; break; case 'x': { pos = pos2 + 1; while (pos2 <= pos && pos2 + 1 < str.size() && isxdigit(str[pos2 + 1])) { ++pos2; } if (pos2 >= pos) { out += static_cast<char> (StringToUInt(str.substr(pos, pos2 - pos + 1), 16)); } else { NCBI_THROW2(CStringException, eFormat, "\\x used with no following digits", pos); } break; } case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': { pos = pos2; unsigned char c = str[pos2] - '0'; while (pos2 < pos + 3 && pos2 + 1 < str.size() && str[pos2 + 1] >= '0' && str[pos2 + 1] <= '7') { c = (c << 3) | (str[++pos2] - '0'); } out += c; } default: out += str[pos2]; } pos = pos2 + 1; } return out;}// Determines the end of an HTML <...> tag, accounting for attributes// and comments (the latter allowed only within <!...>).static SIZE_TYPE s_EndOfTag(const string& str, SIZE_TYPE start){ _ASSERT(start < str.size() && str[start] == '<'); bool comments_ok = (start + 1 < str.size() && str[start + 1] == '!'); for (SIZE_TYPE pos = start + 1; pos < str.size(); ++pos) { switch (str[pos]) { case '>': // found the end return pos; case '\"': // start of "string"; advance to end pos = str.find('\"', pos + 1); if (pos == NPOS) { NCBI_THROW2(CStringException, eFormat, "Unclosed string in HTML tag", start); // return pos; } break; case '-': // possible start of -- comment --; advance to end if (comments_ok && pos + 1 < str.size() && str[pos + 1] == '-') { pos = str.find("--", pos + 2); if (pos == NPOS) { NCBI_THROW2(CStringException, eFormat, "Unclosed comment in HTML tag", start); // return pos; } else { ++pos; } } } } NCBI_THROW2(CStringException, eFormat, "Unclosed HTML tag", start); // return NPOS;}// Determines the end of an HTML &foo; character/entity reference// (which might not actually end with a semicolon :-/)static SIZE_TYPE s_EndOfReference(const string& str, SIZE_TYPE start){ _ASSERT(start < str.size() && str[start] == '&');#ifdef NCBI_STRICT_HTML_REFS return str.find(';', start + 1);#else SIZE_TYPE pos = str.find_first_not_of ("#0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz", start + 1); if (pos == NPOS || str[pos] == ';') { return pos; } else { return pos - 1; }#endif}static SIZE_TYPE s_VisibleWidth(const string& str, bool is_html){ if (is_html) { SIZE_TYPE width = 0, pos = 0; for (;;) { SIZE_TYPE pos2 = str.find_first_of("<&", pos); if (pos2 == NPOS) { width += str.size() - pos; break; } else { width += pos2 - pos; if (str[pos2] == '&') { ++width; pos = s_EndOfReference(str, pos); } else { pos = s_EndOfTag(str, pos); } if (pos == NPOS) { break; } else { ++pos; } } } return width; } else { return str.size(); }}list<string>& NStr::Wrap(const string& str, SIZE_TYPE width, list<string>& arr, NStr::TWrapFlags flags, const string* prefix, const string* prefix1){ if (prefix == 0) { prefix = &kEmptyStr; } const string* pfx = prefix1 ? prefix1 : prefix; SIZE_TYPE pos = 0, len = str.size(); string hyphen; // "-" or empty bool is_html = flags & fWrap_HTMLPre ? true : false; enum EScore { // worst to best eForced, ePunct, eSpace, eNewline }; while (pos < len) { SIZE_TYPE column = s_VisibleWidth(*pfx, is_html); SIZE_TYPE column0 = column; // the next line will start at best_pos SIZE_TYPE best_pos = NPOS; EScore best_score = eForced; for (SIZE_TYPE pos2 = pos; pos2 < len && column <= width; ++pos2, ++column) { EScore score = eForced; SIZE_TYPE score_pos = pos2; char c = str[pos2]; if (c == '\n') { best_pos = pos2; best_score = eNewline; break; } else if (isspace(c)) { if (pos2 > 0 && isspace(str[pos2 - 1])) { continue; // take the first space of a group } score = eSpace; } else if (is_html && c == '<') { // treat tags as zero-width... pos2 = s_EndOfTag(str, pos2); --column; } else if (is_html && c == '&') { // ...and references as single characters pos2 = s_EndOfReference(str, pos2); } else if (ispunct(c)) { if (c == '(' || c == '[' || c == '{' || c == '<' || c == '`') { // opening element score = ePunct; } else if (score_pos < len - 1) { // Prefer breaking *after* most types of punctuation. score = ePunct; ++score_pos; } } if (score >= best_score) { best_pos = score_pos; best_score = score; } while (pos2 < len - 1 && str[pos2 + 1] == '\b') { // Account for backspaces ++pos2; if (column > column0) { --column; } } } if (best_score != eNewline && column <= width) { // If the whole remaining text can fit, don't split it... best_pos = len; } else if (best_score == eForced && (flags & fWrap_Hyphenate)) { hyphen = "-"; --best_pos; } arr.push_back(*pfx); {{ // eat backspaces and the characters (if any) that precede them string line(str, pos, best_pos - pos); SIZE_TYPE bs = 0; while ((bs = line.find('\b', bs)) != NPOS) { if (bs > 0) { line.erase(bs - 1, 2); } else { line.erase(0, 1); } } arr.back() += line; }} arr.back() += hyphen; pos = best_pos; pfx = prefix; hyphen = kEmptyStr; if (best_score == eSpace) { // If breaking at a group of spaces, skip over the whole group while (pos < len && isspace(str[pos]) && str[pos] != '\n') { ++pos; } } else if (best_score == eNewline) { ++pos; } while (pos < len && str[pos] == '\b') { ++pos; } } return arr;}list<string>& NStr::WrapList(const list<string>& l, SIZE_TYPE width, const string& delim, list<string>& arr, NStr::TWrapFlags flags, const string* prefix, const string* prefix1){ if (l.empty()) { return arr; } const string* pfx = prefix1 ? prefix1 : prefix; string s = *pfx; bool is_html = flags & fWrap_HTMLPre ? true : false; SIZE_TYPE column = s_VisibleWidth(s, is_html); SIZE_TYPE delwidth = s_VisibleWidth(delim, is_html); bool at_start = true; ITERATE (list<string>, it, l) { SIZE_TYPE term_width = s_VisibleWidth(*it, is_html); if (at_start) { if (column + term_width <= width) { s += *it; column += term_width; at_start = false; } else { // Can't fit, even on its own line; break separately. Wrap(*it, width, arr, flags, prefix, pfx); pfx = prefix; s = *prefix; column = s_VisibleWidth(s, is_html); at_start = true; } } else if (column + delwidth + term_width <= width) { s += delim; s += *it; column += delwidth + term_width; at_start = false; } else { // Can't fit on this line; break here and try again. arr.push_back(s); pfx = prefix; s = *prefix; column = s_VisibleWidth(s, is_html); at_start = true; --it; } } arr.push_back(s); return arr;}#if !defined(HAVE_STRDUP)extern char* strdup(const char* str){ if ( !str ) return 0; size_t size = strlen(str) + 1; void* result = malloc(size); return (char*) (result ? memcpy(result, str, size) : 0);}#endif/////////////////////////////////////////////////////////////////////////////// CStringUTF8void CStringUTF8::x_Append(const char* src){ const char* srcBuf; size_t needed = 0; for (srcBuf = src; *srcBuf; ++srcBuf) { Uint1 ch = *srcBuf; if (ch < 0x80) { ++needed; } else { needed += 2; } } if ( !needed ) return; reserve(length()+needed+1); for (srcBuf = src; *srcBuf; ++srcBuf) { Uint1 ch = *srcBuf; if (ch < 0x80) { append(1, ch); } else { append(1, Uint1((ch >> 6) | 0xC0)); append(1, Uint1((ch & 0x3F) | 0x80)); } }}#if defined(HAVE_WSTRING)void CStringUTF8::x_Append(const wchar_t* src){ const wchar_t* srcBuf; size_t needed = 0; for (srcBuf = src; *srcBuf; ++srcBuf) { Uint2 ch = *srcBuf; if (ch < 0x80) { ++needed; } else if (ch < 0x800) { needed += 2; } else { needed += 3; } } if ( !needed ) return; reserve(length()+needed+1); for (srcBuf = src; *srcBuf; ++srcBuf) { Uint2 ch = *srcBuf; if (ch < 0x80) { append(1, ch); } else if (ch < 0x800) { append(1, Uint2((ch >> 6) | 0xC0)); append(1, Uint2((ch & 0x3F) | 0x80)); } else { append(1, Uint2((ch >> 12) | 0xE0)); append(1, Uint2(((ch >> 6) & 0x3F) | 0x80)); append(1, Uint2((ch & 0x3F) | 0x80)); } }}#endif // HAVE_WSTRINGstring CStringUTF8::AsAscii(void) const{ string result; const char* srcBuf; size_t needed = 0; bool bad = false; bool enough = true; for (srcBuf = c_str(); *srcBuf; ++srcBuf) { Uint1 ch = *srcBuf; if ((ch & 0x80) == 0) { ++needed; } else if ((ch & 0xE0) == 0xC0) { enough = (ch & 0x1F) <= 0x03; if (enough) { ++needed; ch = *(++srcBuf); bad = (ch & 0xC0) != 0x80; } } else if ((ch & 0xF0) == 0xE0) { enough = false; } else { bad = true; } if (!enough) { NCBI_THROW2(CStringException, eConvert, "Cannot convert UTF8 string to single-byte string", s_DiffPtr(srcBuf,c_str())); } if (bad) { NCBI_THROW2(CStringException, eFormat,
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -