📄 unicode.cc.svn-base
字号:
// Copyright 2007-2008 the V8 project authors. All rights reserved.// Redistribution and use in source and binary forms, with or without// modification, are permitted provided that the following conditions are// met://// * Redistributions of source code must retain the above copyright// notice, this list of conditions and the following disclaimer.// * Redistributions in binary form must reproduce the above// copyright notice, this list of conditions and the following// disclaimer in the documentation and/or other materials provided// with the distribution.// * Neither the name of Google Inc. nor the names of its// contributors may be used to endorse or promote products derived// from this software without specific prior written permission.//// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.//// This file was generated at 2008-09-08 11:13:45.862026#include "unicode-inl.h"#include <cstdlib>#include <cstdio>namespace unibrow {/** * \file * Implementations of functions for working with unicode. */typedef signed short int16_t; // NOLINTtypedef unsigned short uint16_t; // NOLINT// All access to the character table should go through this function.template <int D>static inline uchar TableGet(const uint16_t* table, int index) { return table[D * index];}static inline uchar GetEntry(uint16_t entry) { return entry & 0x7fff;}static inline bool IsStart(uint16_t entry) { return (entry & (1 << 15)) != 0;}/** * Look up a character in the unicode table using a mix of binary and * interpolation search. For a uniformly distributed array * interpolation search beats binary search by a wide margin. However, * in this case interpolation search degenerates because of some very * high values in the lower end of the table so this function uses a * combination. The average number of steps to look up the information * about a character is around 10, slightly higher if there is no * information available about the character. */static bool LookupPredicate(const uint16_t* table, uint16_t size, uchar chr) { static const int kEntryDist = 1; uint16_t value = chr & 0x7fff; unsigned int low = 0; unsigned int high = size - 1; while (high != low) { unsigned int mid = low + ((high - low) >> 1); uchar current_value = GetEntry(TableGet<kEntryDist>(table, mid)); // If we've found an entry less than or equal to this one, and the // next one is not also less than this one, we've arrived. if ((current_value <= value) && (mid + 1 == size || GetEntry(TableGet<kEntryDist>(table, mid + 1)) > value)) { low = mid; break; } else if (current_value < value) { low = mid + 1; } else if (current_value > value) { // If we've just checked the bottom-most value and it's not // the one we're looking for, we're done. if (mid == 0) break; high = mid - 1; } } uint16_t field = TableGet<kEntryDist>(table, low); return (GetEntry(field) == value) || (GetEntry(field) < value && IsStart(field));}struct MultiCharacterSpecialCase { uint16_t length; uchar chars[kMaxCaseConvertedSize];};// Look up the mapping for the given character in the specified table,// which is of the specified length and uses the specified special case// mapping for multi-char mappings. The next parameter is the character// following the one to map. The result will be written in to the result// buffer and the number of characters written will be returned. Finally,// if the allow_caching_ptr is non-null then false will be stored in// it if the result contains multiple characters or depends on the// context.static int LookupMapping(const uint16_t* table, uint16_t size, const MultiCharacterSpecialCase* multi_chars, uchar chr, uchar next, uchar* result, bool* allow_caching_ptr) { static const int kEntryDist = 2; uint16_t value = chr & 0x7fff; unsigned int low = 0; unsigned int high = size - 1; while (high != low) { unsigned int mid = low + ((high - low) >> 1); uchar current_value = GetEntry(TableGet<kEntryDist>(table, mid)); // If we've found an entry less than or equal to this one, and the next one // is not also less than this one, we've arrived. if ((current_value <= value) && (mid + 1 == size || GetEntry(TableGet<kEntryDist>(table, mid + 1)) > value)) { low = mid; break; } else if (current_value < value) { low = mid + 1; } else if (current_value > value) { // If we've just checked the bottom-most value and it's not // the one we're looking for, we're done. if (mid == 0) break; high = mid - 1; } } uint16_t field = TableGet<kEntryDist>(table, low); bool found = (GetEntry(field) == value) || (GetEntry(field) < value && IsStart(field)); if (found) { int16_t value = table[2 * low + 1]; if (value == 0) { // 0 means not present return 0; } else if ((value & 3) == 0) { // Low bits 0 means a constant offset from the given character. result[0] = chr + (value >> 2); return 1; } else if ((value & 3) == 1) { // Low bits 1 means a special case mapping if (allow_caching_ptr) *allow_caching_ptr = false; const MultiCharacterSpecialCase& mapping = multi_chars[value >> 2]; for (int i = 0; i < mapping.length; i++)
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -