📄 ustring.cpp

📁 khtml在gtk上的移植版本
💻 CPP
📖 第 1 页 / 共 3 页
字号:
上一页 1 23
  unsigned i = toStrictUInt32(ok);  if (i >= 0xFFFFFFFFU && ok)    *ok = false;  return i;}int UString::find(const UString &f, int pos) const{  int sz = size();  int fsz = f.size();  if (sz < fsz)    return -1;  if (pos < 0)    pos = 0;  if (fsz == 0)    return pos;  const UChar *end = data() + sz - fsz;  long fsizeminusone = (fsz - 1) * sizeof(UChar);  const UChar *fdata = f.data();  for (const UChar *c = data() + pos; c <= end; c++)    if (*c == *fdata && !memcmp(c + 1, fdata + 1, fsizeminusone))      return (c-data());  return -1;}int UString::find(UChar ch, int pos) const{  if (pos < 0)    pos = 0;  const UChar *end = data() + size();  for (const UChar *c = data() + pos; c < end; c++)    if (*c == ch)      return (c-data());  return -1;}int UString::rfind(const UString &f, int pos) const{  int sz = size();  int fsz = f.size();  if (sz < fsz)    return -1;  if (pos < 0)    pos = 0;  if (pos > sz - fsz)    pos = sz - fsz;  if (fsz == 0)    return pos;  long fsizeminusone = (fsz - 1) * sizeof(UChar);  const UChar *fdata = f.data();  for (const UChar *c = data() + pos; c >= data(); c--) {    if (*c == *fdata && !memcmp(c + 1, fdata + 1, fsizeminusone))      return (c-data());  }  return -1;}int UString::rfind(UChar ch, int pos) const{  if (isEmpty())    return -1;  if (pos + 1 >= size())    pos = size() - 1;  for (const UChar *c = data() + pos; c >= data(); c--) {    if (*c == ch)      return (c-data());  }  return -1;}UString UString::substr(int pos, int len) const{  if (pos < 0)    pos = 0;  else if (pos >= (int) size())    pos = size();  if (len < 0)    len = size();  if (pos + len >= (int) size())    len = size() - pos;  UString::Rep *newRep = Rep::create(rep, pos, len);  UString result(newRep);  newRep->deref();  return result;}void UString::attach(Rep *r){  rep = r;  rep->ref();}void UString::detach(){  if (rep->rc > 1 || rep->baseString) {    int l = size();    UChar *n = static_cast<UChar *>(malloc(sizeof(UChar) * l));    memcpy(n, data(), l * sizeof(UChar));    release();    rep = Rep::create(n, l);  }}void UString::release(){  rep->deref();}bool KJS::operator==(const UString& s1, const UString& s2){  if (s1.rep->len != s2.rep->len)    return false;  return (memcmp(s1.rep->data(), s2.rep->data(),		 s1.rep->len * sizeof(UChar)) == 0);}bool KJS::operator==(const UString& s1, const char *s2){  if (s2 == 0) {    return s1.isEmpty();  }  const UChar *u = s1.data();  const UChar *uend = u + s1.size();  while (u != uend && *s2) {    if (u->uc != (unsigned char)*s2)      return false;    s2++;    u++;  }  return u == uend && *s2 == 0;}bool KJS::operator<(const UString& s1, const UString& s2){  const int l1 = s1.size();  const int l2 = s2.size();  const int lmin = l1 < l2 ? l1 : l2;  const UChar *c1 = s1.data();  const UChar *c2 = s2.data();  int l = 0;  while (l < lmin && *c1 == *c2) {    c1++;    c2++;    l++;  }  if (l < lmin)    return (c1->uc < c2->uc);  return (l1 < l2);}int KJS::compare(const UString& s1, const UString& s2){  const int l1 = s1.size();  const int l2 = s2.size();  const int lmin = l1 < l2 ? l1 : l2;  const UChar *c1 = s1.data();  const UChar *c2 = s2.data();  int l = 0;  while (l < lmin && *c1 == *c2) {    c1++;    c2++;    l++;  }  if (l < lmin)    return (c1->uc > c2->uc) ? 1 : -1;  if (l1 == l2) {    return 0;  }  return (l1 < l2) ? 1 : -1;}inline int inlineUTF8SequenceLengthNonASCII(char b0){  if ((b0 & 0xC0) != 0xC0)    return 0;  if ((b0 & 0xE0) == 0xC0)    return 2;  if ((b0 & 0xF0) == 0xE0)    return 3;  if ((b0 & 0xF8) == 0xF0)    return 4;  return 0;}int UTF8SequenceLengthNonASCII(char b0){  return inlineUTF8SequenceLengthNonASCII(b0);}inline int inlineUTF8SequenceLength(char b0){  return (b0 & 0x80) == 0 ? 1 : UTF8SequenceLengthNonASCII(b0);}// Given a first byte, gives the length of the UTF-8 sequence it begins.// Returns 0 for bytes that are not legal starts of UTF-8 sequences.// Only allows sequences of up to 4 bytes, since that works for all Unicode characters (U-00000000 to U-0010FFFF).int UTF8SequenceLength(char b0){  return (b0 & 0x80) == 0 ? 1 : inlineUTF8SequenceLengthNonASCII(b0);}// Takes a null-terminated C-style string with a UTF-8 sequence in it and converts it to a character.// Only allows Unicode characters (U-00000000 to U-0010FFFF).// Returns -1 if the sequence is not valid (including presence of extra bytes).int decodeUTF8Sequence(const char *sequence){  // Handle 0-byte sequences (never valid).  const unsigned char b0 = sequence[0];  const int length = inlineUTF8SequenceLength(b0);  if (length == 0)    return -1;  // Handle 1-byte sequences (plain ASCII).  const unsigned char b1 = sequence[1];  if (length == 1) {    if (b1)      return -1;    return b0;  }  // Handle 2-byte sequences.  if ((b1 & 0xC0) != 0x80)    return -1;  const unsigned char b2 = sequence[2];  if (length == 2) {    if (b2)      return -1;    const int c = ((b0 & 0x1F) << 6) | (b1 & 0x3F);    if (c < 0x80)      return -1;    return c;  }  // Handle 3-byte sequences.  if ((b2 & 0xC0) != 0x80)    return -1;  const unsigned char b3 = sequence[3];  if (length == 3) {    if (b3)      return -1;    const int c = ((b0 & 0xF) << 12) | ((b1 & 0x3F) << 6) | (b2 & 0x3F);    if (c < 0x800)      return -1;    // UTF-16 surrogates should never appear in UTF-8 data.    if (c >= 0xD800 && c <= 0xDFFF)      return -1;    // Backwards BOM and U+FFFF should never appear in UTF-8 data.    if (c == 0xFFFE || c == 0xFFFF)      return -1;    return c;  }  // Handle 4-byte sequences.  if ((b3 & 0xC0) != 0x80)    return -1;  const unsigned char b4 = sequence[4];  if (length == 4) {    if (b4)      return -1;    const int c = ((b0 & 0x7) << 18) | ((b1 & 0x3F) << 12) | ((b2 & 0x3F) << 6) | (b3 & 0x3F);    if (c < 0x10000 || c > 0x10FFFF)      return -1;    return c;  }  return -1;}CString UString::UTF8String() const{  // Allocate a buffer big enough to hold all the characters.  const int length = size();  const unsigned bufferSize = length * 3;  char fixedSizeBuffer[1024];  char *buffer;  if (bufferSize > sizeof(fixedSizeBuffer)) {    buffer = new char [bufferSize];  } else {    buffer = fixedSizeBuffer;  }  // Convert to runs of 8-bit characters.  char *p = buffer;  const UChar *d = data();  for (int i = 0; i != length; ++i) {    unsigned short c = d[i].unicode();    if (c < 0x80) {      *p++ = (char)c;    } else if (c < 0x800) {      *p++ = (char)((c >> 6) | 0xC0); // C0 is the 2-byte flag for UTF-8      *p++ = (char)((c | 0x80) & 0xBF); // next 6 bits, with high bit set    } else if (c >= 0xD800 && c <= 0xDBFF && i < length && d[i+1].uc >= 0xDC00 && d[i+2].uc <= 0xDFFF) {      unsigned sc = 0x10000 + (((c & 0x3FF) << 10) | (d[i+1].uc & 0x3FF));      *p++ = (char)((sc >> 18) | 0xF0); // F0 is the 4-byte flag for UTF-8      *p++ = (char)(((sc >> 12) | 0x80) & 0xBF); // next 6 bits, with high bit set      *p++ = (char)(((sc >> 6) | 0x80) & 0xBF); // next 6 bits, with high bit set      *p++ = (char)((sc | 0x80) & 0xBF); // next 6 bits, with high bit set      ++i;    } else {      *p++ = (char)((c >> 12) | 0xE0); // E0 is the 3-byte flag for UTF-8      *p++ = (char)(((c >> 6) | 0x80) & 0xBF); // next 6 bits, with high bit set      *p++ = (char)((c | 0x80) & 0xBF); // next 6 bits, with high bit set    }  }  // Return the result as a C string.  CString result(buffer, p - buffer);  if (buffer != fixedSizeBuffer) {    delete [] buffer;  }  return result;}struct StringOffset {    int offset;    int locationInOffsetsArray;};static int compareStringOffsets(const void *a, const void *b){    const StringOffset *oa = static_cast<const StringOffset *>(a);    const StringOffset *ob = static_cast<const StringOffset *>(b);        if (oa->offset < ob->offset) {        return -1;    }    if (oa->offset > ob->offset) {        return +1;    }    return 0;}const int sortedOffsetsFixedBufferSize = 128;static StringOffset *createSortedOffsetsArray(const int offsets[], int numOffsets,    StringOffset sortedOffsetsFixedBuffer[sortedOffsetsFixedBufferSize]){    // Allocate the sorted offsets.    StringOffset *sortedOffsets;    if (numOffsets <= sortedOffsetsFixedBufferSize) {        sortedOffsets = sortedOffsetsFixedBuffer;    } else {        sortedOffsets = new StringOffset [numOffsets];    }    // Copy offsets and sort them.    // (Since qsort showed up on profiles, hand code for numbers up to 3.)    switch (numOffsets) {        case 0:            break;        case 1:            sortedOffsets[0].offset = offsets[0];            sortedOffsets[0].locationInOffsetsArray = 0;            break;        case 2: {            if (offsets[0] <= offsets[1]) {                sortedOffsets[0].offset = offsets[0];                sortedOffsets[0].locationInOffsetsArray = 0;                sortedOffsets[1].offset = offsets[1];                sortedOffsets[1].locationInOffsetsArray = 1;            } else {                sortedOffsets[0].offset = offsets[1];                sortedOffsets[0].locationInOffsetsArray = 1;                sortedOffsets[1].offset = offsets[0];                sortedOffsets[1].locationInOffsetsArray = 0;            }            break;        }        case 3: {            int i0, i1, i2;            if (offsets[0] <= offsets[1]) {                if (offsets[0] <= offsets[2]) {                    i0 = 0;                    if (offsets[1] <= offsets[2]) {                        i1 = 1; i2 = 2;                    } else {                        i1 = 2; i2 = 1;                    }                } else {                    i0 = 2; i1 = 0; i2 = 1;                }            } else {                if (offsets[1] <= offsets[2]) {                    i0 = 1;                    if (offsets[0] <= offsets[2]) {                        i1 = 0; i2 = 2;                    } else {                        i1 = 2; i2 = 0;                    }                } else {                    i0 = 2; i1 = 1; i2 = 0;                }            }            sortedOffsets[0].offset = offsets[i0];            sortedOffsets[0].locationInOffsetsArray = i0;            sortedOffsets[1].offset = offsets[i1];            sortedOffsets[1].locationInOffsetsArray = i1;            sortedOffsets[2].offset = offsets[i2];            sortedOffsets[2].locationInOffsetsArray = i2;            break;        }        default:            for (int i = 0; i != numOffsets; ++i) {                sortedOffsets[i].offset = offsets[i];                sortedOffsets[i].locationInOffsetsArray = i;            }            qsort(sortedOffsets, numOffsets, sizeof(StringOffset), compareStringOffsets);    }    return sortedOffsets;}// Note: This function assumes valid UTF-8.// It can even go into an infinite loop if the passed in string is not valid UTF-8.void convertUTF16OffsetsToUTF8Offsets(const char *s, int *offsets, int numOffsets){    // Allocate buffer.    StringOffset fixedBuffer[sortedOffsetsFixedBufferSize];    StringOffset *sortedOffsets = createSortedOffsetsArray(offsets, numOffsets, fixedBuffer);    // Walk through sorted offsets and string, adjusting all the offests.    // Offsets that are off the ends of the string map to the edges of the string.    int UTF16Offset = 0;    const char *p = s;    for (int oi = 0; oi != numOffsets; ++oi) {        const int nextOffset = sortedOffsets[oi].offset;        while (*p && UTF16Offset < nextOffset) {            // Skip to the next character.            const int sequenceLength = inlineUTF8SequenceLength(*p);            assert(sequenceLength >= 1 && sequenceLength <= 4);            p += sequenceLength;            // Characters that take a 4 byte sequence in UTF-8 take two bytes in UTF-16.            UTF16Offset += sequenceLength < 4 ? 1 : 2;        }        offsets[sortedOffsets[oi].locationInOffsetsArray] = p - s;    }    // Free buffer.    if (sortedOffsets != fixedBuffer) {        delete [] sortedOffsets;    }}// Note: This function assumes valid UTF-8.// It can even go into an infinite loop if the passed in string is not valid UTF-8.void convertUTF8OffsetsToUTF16Offsets(const char *s, int *offsets, int numOffsets){    // Allocate buffer.    StringOffset fixedBuffer[sortedOffsetsFixedBufferSize];    StringOffset *sortedOffsets = createSortedOffsetsArray(offsets, numOffsets, fixedBuffer);    // Walk through sorted offsets and string, adjusting all the offests.    // Offsets that are off the end of the string map to the edges of the string.    int UTF16Offset = 0;    const char *p = s;    for (int oi = 0; oi != numOffsets; ++oi) {        const int nextOffset = sortedOffsets[oi].offset;        while (*p && (p - s) < nextOffset) {            // Skip to the next character.            const int sequenceLength = inlineUTF8SequenceLength(*p);            assert(sequenceLength >= 1 && sequenceLength <= 4);            p += sequenceLength;            // Characters that take a 4 byte sequence in UTF-8 take two bytes in UTF-16.            UTF16Offset += sequenceLength < 4 ? 1 : 2;        }        offsets[sortedOffsets[oi].locationInOffsetsArray] = UTF16Offset;    }    // Free buffer.    if (sortedOffsets != fixedBuffer) {        delete [] sortedOffsets;    }}} // namespace KJS
上一页 1 23
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -