📄 ustring.cpp
字号:
unsigned i = toStrictUInt32(ok); if (i >= 0xFFFFFFFFU && ok) *ok = false; return i;}int UString::find(const UString &f, int pos) const{ int sz = size(); int fsz = f.size(); if (sz < fsz) return -1; if (pos < 0) pos = 0; if (fsz == 0) return pos; const UChar *end = data() + sz - fsz; long fsizeminusone = (fsz - 1) * sizeof(UChar); const UChar *fdata = f.data(); for (const UChar *c = data() + pos; c <= end; c++) if (*c == *fdata && !memcmp(c + 1, fdata + 1, fsizeminusone)) return (c-data()); return -1;}int UString::find(UChar ch, int pos) const{ if (pos < 0) pos = 0; const UChar *end = data() + size(); for (const UChar *c = data() + pos; c < end; c++) if (*c == ch) return (c-data()); return -1;}int UString::rfind(const UString &f, int pos) const{ int sz = size(); int fsz = f.size(); if (sz < fsz) return -1; if (pos < 0) pos = 0; if (pos > sz - fsz) pos = sz - fsz; if (fsz == 0) return pos; long fsizeminusone = (fsz - 1) * sizeof(UChar); const UChar *fdata = f.data(); for (const UChar *c = data() + pos; c >= data(); c--) { if (*c == *fdata && !memcmp(c + 1, fdata + 1, fsizeminusone)) return (c-data()); } return -1;}int UString::rfind(UChar ch, int pos) const{ if (isEmpty()) return -1; if (pos + 1 >= size()) pos = size() - 1; for (const UChar *c = data() + pos; c >= data(); c--) { if (*c == ch) return (c-data()); } return -1;}UString UString::substr(int pos, int len) const{ if (pos < 0) pos = 0; else if (pos >= (int) size()) pos = size(); if (len < 0) len = size(); if (pos + len >= (int) size()) len = size() - pos; UString::Rep *newRep = Rep::create(rep, pos, len); UString result(newRep); newRep->deref(); return result;}void UString::attach(Rep *r){ rep = r; rep->ref();}void UString::detach(){ if (rep->rc > 1 || rep->baseString) { int l = size(); UChar *n = static_cast<UChar *>(malloc(sizeof(UChar) * l)); memcpy(n, data(), l * sizeof(UChar)); release(); rep = Rep::create(n, l); }}void UString::release(){ rep->deref();}bool KJS::operator==(const UString& s1, const UString& s2){ if (s1.rep->len != s2.rep->len) return false; return (memcmp(s1.rep->data(), s2.rep->data(), s1.rep->len * sizeof(UChar)) == 0);}bool KJS::operator==(const UString& s1, const char *s2){ if (s2 == 0) { return s1.isEmpty(); } const UChar *u = s1.data(); const UChar *uend = u + s1.size(); while (u != uend && *s2) { if (u->uc != (unsigned char)*s2) return false; s2++; u++; } return u == uend && *s2 == 0;}bool KJS::operator<(const UString& s1, const UString& s2){ const int l1 = s1.size(); const int l2 = s2.size(); const int lmin = l1 < l2 ? l1 : l2; const UChar *c1 = s1.data(); const UChar *c2 = s2.data(); int l = 0; while (l < lmin && *c1 == *c2) { c1++; c2++; l++; } if (l < lmin) return (c1->uc < c2->uc); return (l1 < l2);}int KJS::compare(const UString& s1, const UString& s2){ const int l1 = s1.size(); const int l2 = s2.size(); const int lmin = l1 < l2 ? l1 : l2; const UChar *c1 = s1.data(); const UChar *c2 = s2.data(); int l = 0; while (l < lmin && *c1 == *c2) { c1++; c2++; l++; } if (l < lmin) return (c1->uc > c2->uc) ? 1 : -1; if (l1 == l2) { return 0; } return (l1 < l2) ? 1 : -1;}inline int inlineUTF8SequenceLengthNonASCII(char b0){ if ((b0 & 0xC0) != 0xC0) return 0; if ((b0 & 0xE0) == 0xC0) return 2; if ((b0 & 0xF0) == 0xE0) return 3; if ((b0 & 0xF8) == 0xF0) return 4; return 0;}int UTF8SequenceLengthNonASCII(char b0){ return inlineUTF8SequenceLengthNonASCII(b0);}inline int inlineUTF8SequenceLength(char b0){ return (b0 & 0x80) == 0 ? 1 : UTF8SequenceLengthNonASCII(b0);}// Given a first byte, gives the length of the UTF-8 sequence it begins.// Returns 0 for bytes that are not legal starts of UTF-8 sequences.// Only allows sequences of up to 4 bytes, since that works for all Unicode characters (U-00000000 to U-0010FFFF).int UTF8SequenceLength(char b0){ return (b0 & 0x80) == 0 ? 1 : inlineUTF8SequenceLengthNonASCII(b0);}// Takes a null-terminated C-style string with a UTF-8 sequence in it and converts it to a character.// Only allows Unicode characters (U-00000000 to U-0010FFFF).// Returns -1 if the sequence is not valid (including presence of extra bytes).int decodeUTF8Sequence(const char *sequence){ // Handle 0-byte sequences (never valid). const unsigned char b0 = sequence[0]; const int length = inlineUTF8SequenceLength(b0); if (length == 0) return -1; // Handle 1-byte sequences (plain ASCII). const unsigned char b1 = sequence[1]; if (length == 1) { if (b1) return -1; return b0; } // Handle 2-byte sequences. if ((b1 & 0xC0) != 0x80) return -1; const unsigned char b2 = sequence[2]; if (length == 2) { if (b2) return -1; const int c = ((b0 & 0x1F) << 6) | (b1 & 0x3F); if (c < 0x80) return -1; return c; } // Handle 3-byte sequences. if ((b2 & 0xC0) != 0x80) return -1; const unsigned char b3 = sequence[3]; if (length == 3) { if (b3) return -1; const int c = ((b0 & 0xF) << 12) | ((b1 & 0x3F) << 6) | (b2 & 0x3F); if (c < 0x800) return -1; // UTF-16 surrogates should never appear in UTF-8 data. if (c >= 0xD800 && c <= 0xDFFF) return -1; // Backwards BOM and U+FFFF should never appear in UTF-8 data. if (c == 0xFFFE || c == 0xFFFF) return -1; return c; } // Handle 4-byte sequences. if ((b3 & 0xC0) != 0x80) return -1; const unsigned char b4 = sequence[4]; if (length == 4) { if (b4) return -1; const int c = ((b0 & 0x7) << 18) | ((b1 & 0x3F) << 12) | ((b2 & 0x3F) << 6) | (b3 & 0x3F); if (c < 0x10000 || c > 0x10FFFF) return -1; return c; } return -1;}CString UString::UTF8String() const{ // Allocate a buffer big enough to hold all the characters. const int length = size(); const unsigned bufferSize = length * 3; char fixedSizeBuffer[1024]; char *buffer; if (bufferSize > sizeof(fixedSizeBuffer)) { buffer = new char [bufferSize]; } else { buffer = fixedSizeBuffer; } // Convert to runs of 8-bit characters. char *p = buffer; const UChar *d = data(); for (int i = 0; i != length; ++i) { unsigned short c = d[i].unicode(); if (c < 0x80) { *p++ = (char)c; } else if (c < 0x800) { *p++ = (char)((c >> 6) | 0xC0); // C0 is the 2-byte flag for UTF-8 *p++ = (char)((c | 0x80) & 0xBF); // next 6 bits, with high bit set } else if (c >= 0xD800 && c <= 0xDBFF && i < length && d[i+1].uc >= 0xDC00 && d[i+2].uc <= 0xDFFF) { unsigned sc = 0x10000 + (((c & 0x3FF) << 10) | (d[i+1].uc & 0x3FF)); *p++ = (char)((sc >> 18) | 0xF0); // F0 is the 4-byte flag for UTF-8 *p++ = (char)(((sc >> 12) | 0x80) & 0xBF); // next 6 bits, with high bit set *p++ = (char)(((sc >> 6) | 0x80) & 0xBF); // next 6 bits, with high bit set *p++ = (char)((sc | 0x80) & 0xBF); // next 6 bits, with high bit set ++i; } else { *p++ = (char)((c >> 12) | 0xE0); // E0 is the 3-byte flag for UTF-8 *p++ = (char)(((c >> 6) | 0x80) & 0xBF); // next 6 bits, with high bit set *p++ = (char)((c | 0x80) & 0xBF); // next 6 bits, with high bit set } } // Return the result as a C string. CString result(buffer, p - buffer); if (buffer != fixedSizeBuffer) { delete [] buffer; } return result;}struct StringOffset { int offset; int locationInOffsetsArray;};static int compareStringOffsets(const void *a, const void *b){ const StringOffset *oa = static_cast<const StringOffset *>(a); const StringOffset *ob = static_cast<const StringOffset *>(b); if (oa->offset < ob->offset) { return -1; } if (oa->offset > ob->offset) { return +1; } return 0;}const int sortedOffsetsFixedBufferSize = 128;static StringOffset *createSortedOffsetsArray(const int offsets[], int numOffsets, StringOffset sortedOffsetsFixedBuffer[sortedOffsetsFixedBufferSize]){ // Allocate the sorted offsets. StringOffset *sortedOffsets; if (numOffsets <= sortedOffsetsFixedBufferSize) { sortedOffsets = sortedOffsetsFixedBuffer; } else { sortedOffsets = new StringOffset [numOffsets]; } // Copy offsets and sort them. // (Since qsort showed up on profiles, hand code for numbers up to 3.) switch (numOffsets) { case 0: break; case 1: sortedOffsets[0].offset = offsets[0]; sortedOffsets[0].locationInOffsetsArray = 0; break; case 2: { if (offsets[0] <= offsets[1]) { sortedOffsets[0].offset = offsets[0]; sortedOffsets[0].locationInOffsetsArray = 0; sortedOffsets[1].offset = offsets[1]; sortedOffsets[1].locationInOffsetsArray = 1; } else { sortedOffsets[0].offset = offsets[1]; sortedOffsets[0].locationInOffsetsArray = 1; sortedOffsets[1].offset = offsets[0]; sortedOffsets[1].locationInOffsetsArray = 0; } break; } case 3: { int i0, i1, i2; if (offsets[0] <= offsets[1]) { if (offsets[0] <= offsets[2]) { i0 = 0; if (offsets[1] <= offsets[2]) { i1 = 1; i2 = 2; } else { i1 = 2; i2 = 1; } } else { i0 = 2; i1 = 0; i2 = 1; } } else { if (offsets[1] <= offsets[2]) { i0 = 1; if (offsets[0] <= offsets[2]) { i1 = 0; i2 = 2; } else { i1 = 2; i2 = 0; } } else { i0 = 2; i1 = 1; i2 = 0; } } sortedOffsets[0].offset = offsets[i0]; sortedOffsets[0].locationInOffsetsArray = i0; sortedOffsets[1].offset = offsets[i1]; sortedOffsets[1].locationInOffsetsArray = i1; sortedOffsets[2].offset = offsets[i2]; sortedOffsets[2].locationInOffsetsArray = i2; break; } default: for (int i = 0; i != numOffsets; ++i) { sortedOffsets[i].offset = offsets[i]; sortedOffsets[i].locationInOffsetsArray = i; } qsort(sortedOffsets, numOffsets, sizeof(StringOffset), compareStringOffsets); } return sortedOffsets;}// Note: This function assumes valid UTF-8.// It can even go into an infinite loop if the passed in string is not valid UTF-8.void convertUTF16OffsetsToUTF8Offsets(const char *s, int *offsets, int numOffsets){ // Allocate buffer. StringOffset fixedBuffer[sortedOffsetsFixedBufferSize]; StringOffset *sortedOffsets = createSortedOffsetsArray(offsets, numOffsets, fixedBuffer); // Walk through sorted offsets and string, adjusting all the offests. // Offsets that are off the ends of the string map to the edges of the string. int UTF16Offset = 0; const char *p = s; for (int oi = 0; oi != numOffsets; ++oi) { const int nextOffset = sortedOffsets[oi].offset; while (*p && UTF16Offset < nextOffset) { // Skip to the next character. const int sequenceLength = inlineUTF8SequenceLength(*p); assert(sequenceLength >= 1 && sequenceLength <= 4); p += sequenceLength; // Characters that take a 4 byte sequence in UTF-8 take two bytes in UTF-16. UTF16Offset += sequenceLength < 4 ? 1 : 2; } offsets[sortedOffsets[oi].locationInOffsetsArray] = p - s; } // Free buffer. if (sortedOffsets != fixedBuffer) { delete [] sortedOffsets; }}// Note: This function assumes valid UTF-8.// It can even go into an infinite loop if the passed in string is not valid UTF-8.void convertUTF8OffsetsToUTF16Offsets(const char *s, int *offsets, int numOffsets){ // Allocate buffer. StringOffset fixedBuffer[sortedOffsetsFixedBufferSize]; StringOffset *sortedOffsets = createSortedOffsetsArray(offsets, numOffsets, fixedBuffer); // Walk through sorted offsets and string, adjusting all the offests. // Offsets that are off the end of the string map to the edges of the string. int UTF16Offset = 0; const char *p = s; for (int oi = 0; oi != numOffsets; ++oi) { const int nextOffset = sortedOffsets[oi].offset; while (*p && (p - s) < nextOffset) { // Skip to the next character. const int sequenceLength = inlineUTF8SequenceLength(*p); assert(sequenceLength >= 1 && sequenceLength <= 4); p += sequenceLength; // Characters that take a 4 byte sequence in UTF-8 take two bytes in UTF-16. UTF16Offset += sequenceLength < 4 ? 1 : 2; } offsets[sortedOffsets[oi].locationInOffsetsArray] = UTF16Offset; } // Free buffer. if (sortedOffsets != fixedBuffer) { delete [] sortedOffsets; }}} // namespace KJS
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -