📄 string.cpp
字号:
if (!(*p & 0xFF00)) *p = _toLower(*p); }}void String::toUpper(){#ifdef PEGASUS_HAS_ICU if (InitializeICU::initICUSuccessful()) { if (_rep->refs.get() != 1) _rep = StringRep::copyOnWrite(_rep); // This will do a locale-insensitive, but context-sensitive convert. // Since context-sensitive casing looks at adjacent chars, this // prevents optimizations where the us-ascii is converted before // calling ICU. // The string may shrink or expand after the convert. //// First calculate size of resulting string. u_strToUpper() returns //// only the size when zero is passed as the destination size argument. UErrorCode err = U_ZERO_ERROR; int32_t newSize = u_strToUpper( NULL, 0, (UChar*)_rep->data, _rep->size, NULL, &err); err = U_ZERO_ERROR; //// Reserve enough space for the result. if ((Uint32)newSize > _rep->cap) _reserve(_rep, newSize); //// Perform the conversion (overlapping buffers are allowed). u_strToUpper((UChar*)_rep->data, newSize, (UChar*)_rep->data, _rep->size, NULL, &err); _rep->size = newSize; return; }#endif /* PEGASUS_HAS_ICU */ if (_rep->refs.get() != 1) _rep = StringRep::copyOnWrite(_rep); Uint16* p = _rep->data; size_t n = _rep->size; for (; n--; p++) *p = _toUpper(*p);}int String::compare(const String& s1, const String& s2, Uint32 n){ const Uint16* p1 = s1._rep->data; const Uint16* p2 = s2._rep->data; while (n--) { int r = *p1++ - *p2++; if (r) { return r; } else if (!p1[-1]) { // We must have encountered a null terminator in both s1 and s2 return 0; } } return 0;}int String::compare(const String& s1, const String& s2){ return _compare(s1._rep->data, s2._rep->data);}int String::compare(const String& s1, const char* s2){ _checkNullPointer(s2);#ifdef PEGASUS_STRING_NO_UTF8 return _compareNoUTF8(s1._rep->data, s2);#else // ATTN: optimize this! return String::compare(s1, String(s2));#endif}int String::compareNoCase(const String& str1, const String& str2){#ifdef PEGASUS_HAS_ICU if (InitializeICU::initICUSuccessful()) { return u_strcasecmp( (const UChar*)str1._rep->data, (const UChar*)str2._rep->data, U_FOLD_CASE_DEFAULT ); }#endif /* PEGASUS_HAS_ICU */ const Uint16* s1 = str1._rep->data; const Uint16* s2 = str2._rep->data; while (*s1 && *s2) { int r = _toLower(*s1++) - _toLower(*s2++); if (r) return r; } if (*s2) return -1; else if (*s1) return 1; return 0;}Boolean StringEqualNoCase(const String& s1, const String& s2){#ifdef PEGASUS_HAS_ICU return String::compareNoCase(s1, s2) == 0;#else /* PEGASUS_HAS_ICU */ // The following employs loop unrolling for efficiency. Please do not // eliminate. Uint16* p = (Uint16*)s1.getChar16Data(); Uint16* q = (Uint16*)s2.getChar16Data(); Uint32 n = s2.size(); while (n >= 8) { if (((p[0] - q[0]) && (_toUpper(p[0]) - _toUpper(q[0]))) || ((p[1] - q[1]) && (_toUpper(p[1]) - _toUpper(q[1]))) || ((p[2] - q[2]) && (_toUpper(p[2]) - _toUpper(q[2]))) || ((p[3] - q[3]) && (_toUpper(p[3]) - _toUpper(q[3]))) || ((p[4] - q[4]) && (_toUpper(p[4]) - _toUpper(q[4]))) || ((p[5] - q[5]) && (_toUpper(p[5]) - _toUpper(q[5]))) || ((p[6] - q[6]) && (_toUpper(p[6]) - _toUpper(q[6]))) || ((p[7] - q[7]) && (_toUpper(p[7]) - _toUpper(q[7])))) { return false; } n -= 8; p += 8; q += 8; } while (n >= 4) { if (((p[0] - q[0]) && (_toUpper(p[0]) - _toUpper(q[0]))) || ((p[1] - q[1]) && (_toUpper(p[1]) - _toUpper(q[1]))) || ((p[2] - q[2]) && (_toUpper(p[2]) - _toUpper(q[2]))) || ((p[3] - q[3]) && (_toUpper(p[3]) - _toUpper(q[3])))) { return false; } n -= 4; p += 4; q += 4; } while (n--) { if (((p[0] - q[0]) && (_toUpper(p[0]) - _toUpper(q[0])))) return false; p++; q++; } return true;#endif /* PEGASUS_HAS_ICU */}Boolean String::equalNoCase(const String& s1, const char* s2){ _checkNullPointer(s2);#if defined(PEGASUS_HAS_ICU) return String::equalNoCase(s1, String(s2));#elif defined(PEGASUS_STRING_NO_UTF8) const Uint16* p1 = (Uint16*)s1._rep->data; const char* p2 = s2; size_t n = s1._rep->size; while (n--) { if (!*p2) return false; if (_toUpper(*p1++) != _toUpperTable[int(*p2++)]) return false; } if (*p2) return false; return true;#else /* PEGASUS_HAS_ICU */ // ATTN: optimize this! return String::equalNoCase(s1, String(s2));#endif /* PEGASUS_HAS_ICU */}Boolean String::equal(const String& s1, const String& s2){ return s1._rep->size == s2._rep->size && memcmp(s1._rep->data, s2._rep->data, s1._rep->size * sizeof(Uint16)) == 0;}Boolean String::equal(const String& s1, const char* s2){#ifdef PEGASUS_STRING_NO_UTF8 _checkNullPointer(s2); const Uint16* p = (Uint16*)s1._rep->data; const char* q = s2; while (*p && *q) { if (*p++ != Uint16(*q++)) return false; } return !(*p || *q);#else /* PEGASUS_STRING_NO_UTF8 */ return String::equal(s1, String(s2));#endif /* PEGASUS_STRING_NO_UTF8 */}PEGASUS_STD(ostream)& operator<<(PEGASUS_STD(ostream)& os, const String& str){#if defined(PEGASUS_OS_OS400) CString cstr = str.getCString(); const char* utf8str = cstr; os << utf8str; return os;#else#if defined(PEGASUS_HAS_ICU) if (InitializeICU::initICUSuccessful()) { char *buf = NULL; const int size = str.size() * 6; UnicodeString UniStr( (const UChar *)str.getChar16Data(), (int32_t)str.size()); Uint32 bufsize = UniStr.extract(0,size,buf); buf = new char[bufsize+1]; UniStr.extract(0,bufsize,buf); os << buf; os.flush(); delete [] buf; return os; }#endif // PEGASUS_HAS_ICU for (Uint32 i = 0, n = str.size(); i < n; i++) { Uint16 code = str[i]; if (code > 0 && !(code & 0xFF00)) os << char(code); else { // Print in hex format: char buffer[8]; sprintf(buffer, "\\x%04X", code); os << buffer; } } return os;#endif // PEGASUS_OS_OS400}void StringAppendCharAux(StringRep*& _rep){ StringRep* tmp; if (_rep->cap) { tmp = StringRep::alloc(2 * _rep->cap); tmp->size = _rep->size; _copy(tmp->data, _rep->data, _rep->size); } else { tmp = StringRep::alloc(8); tmp->size = 0; } StringRep::unref(_rep); _rep = tmp;}PEGASUS_NAMESPACE_END/*================================================================================String optimizations: 1. Added mechanism allowing certain functions to be inlined only when used by internal Pegasus modules. External modules (i.e., providers) link to a non-inline version, which allows for binary compatibility. 2. Implemented copy-on-write with atomic increment/decrement. This yieled a 10% improvement for the 'gc' benchmark and a 11% improvment for the 'ni1000' benchmark. 3. Employed loop unrolling in several places. For example, see: static Uint16* _find(const Uint16* s, size_t n, Uint16 c); 4. Used the "empty-rep" optimization (described in whitepaper from the GCC Developers Summit). This reduced default construction to a simple pointer assignment. inline String::String() : _rep(&_emptyRep) { } 5. Implemented Uint16 versions of toupper() and tolower() using tables. For example: static const char _upper[] = { 0,1,2,...255 }; inline Uint16 _toUpper(Uint16 x) { return (x & 0xFF00) ? x : _upper[x]; } This outperforms the system implementation by avoiding an anding operation. 6. Implemented char* version of the following member functions to eliminate unecessary creation of anonymous string objects (temporaries). String(const String& s1, const char* s2); String(const char* s1, const String& s2); String& String::operator=(const char* str); Uint32 String::find(const char* s) const; bool String::equal(const String& s1, const char* s2); static int String::compare(const String& s1, const char* s2); String& String::append(const char* str); String& String::append(const char* str, Uint32 size); static bool String::equalNoCase(const String& s1, const char* s2); String& operator=(const char* str) String& String::assign(const char* str) String& String::append(const char* str) Boolean operator==(const String& s1, const char* s2) Boolean operator==(const char* s1, const String& s2) Boolean operator!=(const String& s1, const char* s2) Boolean operator!=(const char* s1, const String& s2) Boolean operator<(const String& s1, const char* s2) Boolean operator<(const char* s1, const String& s2) Boolean operator>(const String& s1, const char* s2) Boolean operator>(const char* s1, const String& s2) Boolean operator<=(const String& s1, const char* s2) Boolean operator<=(const char* s1, const String& s2) Boolean operator>=(const String& s1, const char* s2) Boolean operator>=(const char* s1, const String& s2) String operator+(const String& s1, const char* s2) String operator+(const char* s1, const String& s2) 7. Optimized _roundUpToPow2(), used in rounding the capacity to the next power of two (algorithm from the book "Hacker's Delight"). static Uint32 _roundUpToPow2(Uint32 x) { if (x < 8) return 8; x--; x |= (x >> 1); x |= (x >> 2); x |= (x >> 4); x |= (x >> 8); x |= (x >> 16); x++; return x; } 8. Implemented "concatenating constructors" to eliminate temporaries created by operator+(). This scheme employs the "return-value optimization" described by Stan Lippman. inline String operator+(const String& s1, const String& s2) { return String(s1, s2, 0); } 9. Experimented to find the optimial initial size for a short string. Eight seems to offer the best tradeoff between space and time. 10. Inlined all members of the Char16 class. 11. Used Uint16 internally in the String class. This showed no improvememnt since Char16 was already fully inlined and was essentially reduced to Uint16 in any case. 12. Implemented conditional logic (#if) allowing error checking logic to be excluded to better performance. Examples include bounds checking and null-pointer checking. 13. Used memcpy() and memcmp() where possible. These are implemented using the rep family of intructions under Intel and are much faster. 14. Used loop unrolling, jump-tables, and short-circuiting to reduce UTF8 copy routine overhead. 15. Added ASCII7 form of the constructor and assign(). String s("hello world", String::ASCII7); s.assignASCII7("hello world"); This avoids slower UTF8 processing when not needed.================================================================================TO-DO: (+) [DONE] Use PEGASUS_USE_EXPERIMENTAL_INTERFACES (+) [DONE] Submit BUG-2754 (Windows buffer limit). (+) [DONE] Eliminate char versions of find() and append(). (+) [DONE] Remove PEGASUS_MAX_PRINTABLE_CHARACTER from Config.h (+) [DONE] Change _next_pow_2() to _roundUpToPow2(). (+) [DONE] Change '99' to '2' in StringRep constructor (comment as well). (+) [DONE] Comment StringRep allocation layout. (+) [DONE] Conceal private inline functions. (+) [DONE] Shorten inclusion of StringInline.h in String.h. (+) [DONE] Change USE_INTERNAL_INLINE TO DISABLE_INTERNAL_INLINE or get rid of altogether. (+) [DONE] useCamelNotationOnAllFunctionNames. (+) [DONE] Check for overlow condition in StringRep::alloc(). (+) [DONE] Remove tabs (used vim ":set expandtab" and ":retab"). (+) [DONE] Fix throw-related memory leak. (+) [DONE] Look at PEP223 for coding security guidelines. (+) [DONE] Use old AtomicInt for now (new AtomicInt part of bug #4250). (+) [DONE] Removed appendASCII() and the ASCII form of the constructor. (+) DOC++ String.h - will open new bug? (+) Added PEGASUS_DISABLE_INTERNAL_INLINES macro (to permit suppression on certain platforms).================================================================================*/
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -