📄 unicode.cc.svn-base
字号:
result[i] = mapping.chars[i]; return mapping.length; } else { // Low bits 2 means a really really special case if (allow_caching_ptr) *allow_caching_ptr = false; // The cases of this switch are defined in unicode.py in the // really_special_cases mapping. switch (value >> 2) { case 1: // Really special case 1: upper case sigma. This letter // converts to two different lower case sigmas depending on // whether or not it occurs at the end of a word. if (next != 0 && Letter::Is(next)) { result[0] = 0x03C3; } else { result[0] = 0x03C2; } return 1; default: return 0; } return -1; } } else { return 0; }}uchar Utf8::CalculateValue(const byte* str, unsigned length, unsigned* cursor) { static const uchar kMaxOneByteChar = 0x7F; static const uchar kMaxTwoByteChar = 0x7FF; static const uchar kMaxThreeByteChar = 0xFFFF; static const uchar kMaxFourByteChar = 0x1FFFFF; // We only get called for non-ascii characters. if (length == 1) { *cursor += 1; return kBadChar; } int first = str[0]; int second = str[1] ^ 0x80; if (second & 0xC0) { *cursor += 1; return kBadChar; } if (first < 0xE0) { if (first < 0xC0) { *cursor += 1; return kBadChar; } uchar l = ((first << 6) | second) & kMaxTwoByteChar; if (l <= kMaxOneByteChar) { *cursor += 1; return kBadChar; } *cursor += 2; return l; } if (length == 2) { *cursor += 1; return kBadChar; } int third = str[2] ^ 0x80; if (third & 0xC0) { *cursor += 1; return kBadChar; } if (first < 0xF0) { uchar l = ((((first << 6) | second) << 6) | third) & kMaxThreeByteChar; if (l <= kMaxTwoByteChar) { *cursor += 1; return kBadChar; } *cursor += 3; return l; } if (length == 3) { *cursor += 1; return kBadChar; } int fourth = str[3] ^ 0x80; if (fourth & 0xC0) { *cursor += 1; return kBadChar; } if (first < 0xF8) { uchar l = (((((first << 6 | second) << 6) | third) << 6) | fourth) & kMaxFourByteChar; if (l <= kMaxThreeByteChar) { *cursor += 1; return kBadChar; } *cursor += 4; return l; } *cursor += 1; return kBadChar;}const byte* Utf8::ReadBlock(Buffer<const char*> str, byte* buffer, unsigned capacity, unsigned* chars_read_ptr, unsigned* offset_ptr) { unsigned offset = *offset_ptr; // Bail out early if we've reached the end of the string. if (offset == str.length()) { *chars_read_ptr = 0; return NULL; } const byte* data = reinterpret_cast<const byte*>(str.data()); if (data[offset] <= kMaxOneByteChar) { // The next character is an ascii char so we scan forward over // the following ascii characters and return the next pure ascii // substring const byte* result = data + offset; offset++; while ((offset < str.length()) && (data[offset] <= kMaxOneByteChar)) offset++; *chars_read_ptr = offset - *offset_ptr; *offset_ptr = offset; return result; } else { // The next character is non-ascii so we just fill the buffer unsigned cursor = 0; unsigned chars_read = 0; while (offset < str.length()) { uchar c = data[offset]; if (c <= kMaxOneByteChar) { // Fast case for ascii characters if (!CharacterStream::EncodeAsciiCharacter(c, buffer, capacity, cursor)) break; offset += 1; } else { unsigned chars = 0; c = Utf8::ValueOf(data + offset, str.length() - offset, &chars); if (!CharacterStream::EncodeNonAsciiCharacter(c, buffer, capacity, cursor)) break; offset += chars; } chars_read++; } *offset_ptr = offset; *chars_read_ptr = chars_read; return buffer; }}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -