📄 string.cpp
字号:
//%2006//////////////////////////////////////////////////////////////////////////// Copyright (c) 2000, 2001, 2002 BMC Software; Hewlett-Packard Development// Company, L.P.; IBM Corp.; The Open Group; Tivoli Systems.// Copyright (c) 2003 BMC Software; Hewlett-Packard Development Company, L.P.;// IBM Corp.; EMC Corporation, The Open Group.// Copyright (c) 2004 BMC Software; Hewlett-Packard Development Company, L.P.;// IBM Corp.; EMC Corporation; VERITAS Software Corporation; The Open Group.// Copyright (c) 2005 Hewlett-Packard Development Company, L.P.; IBM Corp.;// EMC Corporation; VERITAS Software Corporation; The Open Group.// Copyright (c) 2006 Hewlett-Packard Development Company, L.P.; IBM Corp.;// EMC Corporation; Symantec Corporation; The Open Group.//// Permission is hereby granted, free of charge, to any person obtaining a copy// of this software and associated documentation files (the "Software"), to// deal in the Software without restriction, including without limitation the// rights to use, copy, modify, merge, publish, distribute, sublicense, and/or// sell copies of the Software, and to permit persons to whom the Software is// furnished to do so, subject to the following conditions:// // THE ABOVE COPYRIGHT NOTICE AND THIS PERMISSION NOTICE SHALL BE INCLUDED IN// ALL COPIES OR SUBSTANTIAL PORTIONS OF THE SOFTWARE. THE SOFTWARE IS PROVIDED// "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT// LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR// PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN// ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION// WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.////==============================================================================////%/////////////////////////////////////////////////////////////////////////////#include <Pegasus/Common/PegasusAssert.h>#include <cstring>#include "InternalException.h"#include "CommonUTF.h"#include "MessageLoader.h"#include "StringRep.h"#ifdef PEGASUS_HAS_ICU#include <unicode/ustring.h>#include <unicode/uchar.h>#endifPEGASUS_NAMESPACE_BEGIN//==============================================================================//// Compile-time macros (undefined by default).//// PEGASUS_STRING_NO_UTF8 -- don't generate slower UTF8 code.////==============================================================================//==============================================================================//// File-scope definitions:////==============================================================================// Note: this table is much faster than the system toupper(). Please do not// change.const Uint8 _toUpperTable[256] ={ 0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07, 0x08,0x09,0x0A,0x0B,0x0C,0x0D,0x0E,0x0F, 0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17, 0x18,0x19,0x1A,0x1B,0x1C,0x1D,0x1E,0x1F, 0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27, 0x28,0x29,0x2A,0x2B,0x2C,0x2D,0x2E,0x2F, 0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37, 0x38,0x39,0x3A,0x3B,0x3C,0x3D,0x3E,0x3F, 0x40,0x41,0x42,0x43,0x44,0x45,0x46,0x47, 0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F, 0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57, 0x58,0x59,0x5A,0x5B,0x5C,0x5D,0x5E,0x5F, 0x60,0x41,0x42,0x43,0x44,0x45,0x46,0x47, 0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F, 0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57, 0x58,0x59,0x5A,0x7B,0x7C,0x7D,0x7E,0x7F, 0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87, 0x88,0x89,0x8A,0x8B,0x8C,0x8D,0x8E,0x8F, 0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97, 0x98,0x99,0x9A,0x9B,0x9C,0x9D,0x9E,0x9F, 0xA0,0xA1,0xA2,0xA3,0xA4,0xA5,0xA6,0xA7, 0xA8,0xA9,0xAA,0xAB,0xAC,0xAD,0xAE,0xAF, 0xB0,0xB1,0xB2,0xB3,0xB4,0xB5,0xB6,0xB7, 0xB8,0xB9,0xBA,0xBB,0xBC,0xBD,0xBE,0xBF, 0xC0,0xC1,0xC2,0xC3,0xC4,0xC5,0xC6,0xC7, 0xC8,0xC9,0xCA,0xCB,0xCC,0xCD,0xCE,0xCF, 0xD0,0xD1,0xD2,0xD3,0xD4,0xD5,0xD6,0xD7, 0xD8,0xD9,0xDA,0xDB,0xDC,0xDD,0xDE,0xDF, 0xE0,0xE1,0xE2,0xE3,0xE4,0xE5,0xE6,0xE7, 0xE8,0xE9,0xEA,0xEB,0xEC,0xED,0xEE,0xEF, 0xF0,0xF1,0xF2,0xF3,0xF4,0xF5,0xF6,0xF7, 0xF8,0xF9,0xFA,0xFB,0xFC,0xFD,0xFE,0xFF,};// Note: this table is much faster than the system tulower(). Please do not// change.const Uint8 _toLowerTable[256] ={ 0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07, 0x08,0x09,0x0A,0x0B,0x0C,0x0D,0x0E,0x0F, 0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17, 0x18,0x19,0x1A,0x1B,0x1C,0x1D,0x1E,0x1F, 0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27, 0x28,0x29,0x2A,0x2B,0x2C,0x2D,0x2E,0x2F, 0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37, 0x38,0x39,0x3A,0x3B,0x3C,0x3D,0x3E,0x3F, 0x40,0x61,0x62,0x63,0x64,0x65,0x66,0x67, 0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F, 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77, 0x78,0x79,0x7A,0x5B,0x5C,0x5D,0x5E,0x5F, 0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67, 0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F, 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77, 0x78,0x79,0x7A,0x7B,0x7C,0x7D,0x7E,0x7F, 0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87, 0x88,0x89,0x8A,0x8B,0x8C,0x8D,0x8E,0x8F, 0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97, 0x98,0x99,0x9A,0x9B,0x9C,0x9D,0x9E,0x9F, 0xA0,0xA1,0xA2,0xA3,0xA4,0xA5,0xA6,0xA7, 0xA8,0xA9,0xAA,0xAB,0xAC,0xAD,0xAE,0xAF, 0xB0,0xB1,0xB2,0xB3,0xB4,0xB5,0xB6,0xB7, 0xB8,0xB9,0xBA,0xBB,0xBC,0xBD,0xBE,0xBF, 0xC0,0xC1,0xC2,0xC3,0xC4,0xC5,0xC6,0xC7, 0xC8,0xC9,0xCA,0xCB,0xCC,0xCD,0xCE,0xCF, 0xD0,0xD1,0xD2,0xD3,0xD4,0xD5,0xD6,0xD7, 0xD8,0xD9,0xDA,0xDB,0xDC,0xDD,0xDE,0xDF, 0xE0,0xE1,0xE2,0xE3,0xE4,0xE5,0xE6,0xE7, 0xE8,0xE9,0xEA,0xEB,0xEC,0xED,0xEE,0xEF, 0xF0,0xF1,0xF2,0xF3,0xF4,0xF5,0xF6,0xF7, 0xF8,0xF9,0xFA,0xFB,0xFC,0xFD,0xFE,0xFF,};// Converts 16-bit characters to upper case. This routine is faster than the// system toupper(). Please do not change.inline Uint16 _toUpper(Uint16 x){ return (x & 0xFF00) ? x : _toUpperTable[x];}// Converts 16-bit characters to lower case. This routine is faster than the// system toupper(). Please do not change.inline Uint16 _toLower(Uint16 x){ return (x & 0xFF00) ? x : _toLowerTable[x];}// Rounds x up to the nearest power of two (or just returns 8 if x < 8).static Uint32 _roundUpToPow2(Uint32 x){ // Check for potential overflow in x PEGASUS_CHECK_CAPACITY_OVERFLOW(x); if (x < 8) return 8; x--; x |= (x >> 1); x |= (x >> 2); x |= (x >> 4); x |= (x >> 8); x |= (x >> 16); x++; return x;}template<class P, class Q>static void _copy(P* p, const Q* q, size_t n){ // The following employs loop unrolling for efficiency. Please do not // eliminate. while (n >= 8) { p[0] = q[0]; p[1] = q[1]; p[2] = q[2]; p[3] = q[3]; p[4] = q[4]; p[5] = q[5]; p[6] = q[6]; p[7] = q[7]; p += 8; q += 8; n -= 8; } while (n >= 4) { p[0] = q[0]; p[1] = q[1]; p[2] = q[2]; p[3] = q[3]; p += 4; q += 4; n -= 4; } while (n--) *p++ = *q++;}static Uint16* _find(const Uint16* s, size_t n, Uint16 c){ // The following employs loop unrolling for efficiency. Please do not // eliminate. while (n >= 4) { if (s[0] == c) return (Uint16*)s; if (s[1] == c) return (Uint16*)&s[1]; if (s[2] == c) return (Uint16*)&s[2]; if (s[3] == c) return (Uint16*)&s[3]; n -= 4; s += 4; } if (n) { if (*s == c) return (Uint16*)s; s++; n--; } if (n) { if (*s == c) return (Uint16*)s; s++; n--; } if (n && *s == c) return (Uint16*)s; // Not found! return 0;}static int _compare(const Uint16* s1, const Uint16* s2){ while (*s1 && *s2) { int r = *s1++ - *s2++; if (r) return r; } if (*s2) return -1; else if (*s1) return 1; return 0;}static int _compareNoUTF8(const Uint16* s1, const char* s2){ Uint16 c1; Uint16 c2; do { c1 = *s1++; c2 = *s2++; if (c1 == 0) return c1 - c2; } while (c1 == c2); return c1 - c2;}static inline void _copy(Uint16* s1, const Uint16* s2, size_t n){ memcpy(s1, s2, n * sizeof(Uint16));}void StringThrowOutOfBounds(){ throw IndexOutOfBoundsException();}inline void _checkNullPointer(const void* ptr){ if (!ptr) throw NullPointer();}static void _StringThrowBadUTF8(Uint32 index){ MessageLoaderParms parms( "Common.String.BAD_UTF8", "The byte sequence starting at index $0 " "is not valid UTF-8 encoding.", index); throw Exception(parms);}static size_t _copyFromUTF8( Uint16* dest, const char* src, size_t n, size_t& utf8_error_index){ Uint16* p = dest; const Uint8* q = (const Uint8*)src; // Process leading 7-bit ASCII characters (to avoid UTF8 overhead later). // Use loop-unrolling. while (n >=8 && ((q[0]|q[1]|q[2]|q[3]|q[4]|q[5]|q[6]|q[7]) & 0x80) == 0) { p[0] = q[0]; p[1] = q[1]; p[2] = q[2]; p[3] = q[3]; p[4] = q[4]; p[5] = q[5]; p[6] = q[6]; p[7] = q[7]; p += 8; q += 8; n -= 8; } while (n >=4 && ((q[0]|q[1]|q[2]|q[3]) & 0x80) == 0) { p[0] = q[0]; p[1] = q[1]; p[2] = q[2]; p[3] = q[3]; p += 4; q += 4; n -= 4; } switch (n) { case 0: return p - dest; case 1: if (q[0] < 128) { p[0] = q[0]; return p + 1 - dest; } break; case 2: if (((q[0]|q[1]) & 0x80) == 0) { p[0] = q[0]; p[1] = q[1]; return p + 2 - dest; } break; case 3: if (((q[0]|q[1]|q[2]) & 0x80) == 0) { p[0] = q[0]; p[1] = q[1]; p[2] = q[2]; return p + 3 - dest; } break; } // Process remaining characters. while (n) { // Optimize for 7-bit ASCII case. if (*q < 128) { *p++ = *q++; n--; } else { Uint8 c = UTF_8_COUNT_TRAIL_BYTES(*q) + 1; if (c > n || !isValid_U8(q, c) || UTF8toUTF16(&q, q + c, &p, p + n) != 0) { utf8_error_index = q - (const Uint8*)src; return size_t(-1); } n -= c; } } return p - dest;}// Note: dest must be at least three times src (plus an extra byte for// terminator).static inline size_t _copyToUTF8(char* dest, const Uint16* src, size_t n){ // The following employs loop unrolling for efficiency. Please do not // eliminate. const Uint16* q = src; Uint8* p = (Uint8*)dest; while (n >= 4 && q[0] < 128 && q[1] < 128 && q[2] < 128 && q[3] < 128) { p[0] = q[0]; p[1] = q[1]; p[2] = q[2]; p[3] = q[3]; p += 4; q += 4; n -= 4; } switch (n) { case 0: return p - (Uint8*)dest; case 1: if (q[0] < 128) { p[0] = q[0]; return p + 1 - (Uint8*)dest; } break; case 2: if (q[0] < 128 && q[1] < 128) { p[0] = q[0]; p[1] = q[1]; return p + 2 - (Uint8*)dest; } break; case 3: if (q[0] < 128 && q[1] < 128 && q[2] < 128) { p[0] = q[0]; p[1] = q[1]; p[2] = q[2]; return p + 3 - (Uint8*)dest; } break; } // If this line was reached, there must be characters greater than 128. UTF16toUTF8(&q, q + n, &p, p + 3 * n); return p - (Uint8*)dest;}static inline size_t _convert( Uint16* p, const char* q, size_t n, size_t& utf8_error_index){#ifdef PEGASUS_STRING_NO_UTF8 _copy(p, q, n); return n;#else return _copyFromUTF8(p, q, n, utf8_error_index);#endif}//==============================================================================//// class CString////==============================================================================CString::CString(const CString& cstr) : _rep(0){ if (cstr._rep) { size_t n = strlen(cstr._rep) + 1; _rep = (char*)operator new(n); memcpy(_rep, cstr._rep, n); }}CString& CString::operator=(const CString& cstr){ if (&cstr != this) { if (_rep) {
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -