unicode.h.svn-base

来自「Google浏览器V8内核代码」· SVN-BASE 代码 · 共 283 行
SVN-BASE
283 行
// Copyright 2007-2008 the V8 project authors. All rights reserved.// Redistribution and use in source and binary forms, with or without// modification, are permitted provided that the following conditions are// met:////     * Redistributions of source code must retain the above copyright//       notice, this list of conditions and the following disclaimer.//     * Redistributions in binary form must reproduce the above//       copyright notice, this list of conditions and the following//       disclaimer in the documentation and/or other materials provided//       with the distribution.//     * Neither the name of Google Inc. nor the names of its//       contributors may be used to endorse or promote products derived//       from this software without specific prior written permission.//// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.#ifndef __UNIBROW_H__#define __UNIBROW_H__#include <sys/types.h>/** * \file * Definitions and convenience functions for working with unicode. */namespace unibrow {typedef unsigned int uchar;typedef unsigned char byte;/** * The max length of the result of converting the case of a single * character. */static const int kMaxCaseConvertedSize = 3;template <class T, int size = 256>class Predicate { public:  inline Predicate() { }  inline bool get(uchar c); private:  friend class Test;  bool CalculateValue(uchar c);  struct CacheEntry {    inline CacheEntry() : code_point_(0), value_(0) { }    inline CacheEntry(uchar code_point, bool value)      : code_point_(code_point),        value_(value) { }    uchar code_point_ : 21;    bool value_ : 1;  };  static const int kSize = size;  static const int kMask = kSize - 1;  CacheEntry entries_[kSize];};// A cache used in case conversion.  It caches the value for characters// that either have no mapping or map to a single character independent// of context.  Characters that map to more than one character or that// map differently depending on context are always looked up.template <class T, int size = 256>class Mapping { public:  inline Mapping() { }  inline int get(uchar c, uchar n, uchar* result); private:  friend class Test;  int CalculateValue(uchar c, uchar n, uchar* result);  struct CacheEntry {    inline CacheEntry() : code_point_(0), offset_(0) { }    inline CacheEntry(uchar code_point, signed offset)      : code_point_(code_point),        offset_(offset) { }    uchar code_point_ : 21;    signed offset_ : 11;  };  static const int kSize = size;  static const int kMask = kSize - 1;  CacheEntry entries_[kSize];};class UnicodeData { private:  friend class Test;  static int GetByteCount();  static uchar kMaxCodePoint;};// --- U t f   8 ---template <typename Data>class Buffer { public:  inline Buffer(Data data, unsigned length) : data_(data), length_(length) { }  inline Buffer() : data_(0), length_(0) { }  Data data() { return data_; }  unsigned length() { return length_; } private:  Data data_;  unsigned length_;};class Utf8 { public:  static inline uchar Length(uchar chr);  static inline unsigned Encode(char* out, uchar c);  static const byte* ReadBlock(Buffer<const char*> str, byte* buffer,      unsigned capacity, unsigned* chars_read, unsigned* offset);  static const uchar kBadChar = 0xFFFD;  static const unsigned kMaxEncodedSize   = 4;  static const unsigned kMaxOneByteChar   = 0x7f;  static const unsigned kMaxTwoByteChar   = 0x7ff;  static const unsigned kMaxThreeByteChar = 0xffff;  static const unsigned kMaxFourByteChar  = 0x1fffff; private:  template <unsigned s> friend class Utf8InputBuffer;  friend class Test;  static inline uchar ValueOf(const byte* str,                              unsigned length,                              unsigned* cursor);  static uchar CalculateValue(const byte* str,                              unsigned length,                              unsigned* cursor);};// --- C h a r a c t e r   S t r e a m ---class CharacterStream { public:  inline uchar GetNext();  inline bool has_more() { return remaining_ != 0; }  // Note that default implementation is not efficient.  virtual void Seek(unsigned);  unsigned Length();  virtual ~CharacterStream() { }  static inline bool EncodeCharacter(uchar c, byte* buffer, unsigned capacity,      unsigned& offset);  static inline bool EncodeAsciiCharacter(uchar c, byte* buffer,      unsigned capacity, unsigned& offset);  static inline bool EncodeNonAsciiCharacter(uchar c, byte* buffer,      unsigned capacity, unsigned& offset);  static inline uchar DecodeCharacter(const byte* buffer, unsigned* offset);  virtual void Rewind() = 0; protected:  virtual void FillBuffer() = 0;  // The number of characters left in the current buffer  unsigned remaining_;  // The current offset within the buffer  unsigned cursor_;  // The buffer containing the decoded characters.  const byte* buffer_;};// --- I n p u t   B u f f e r ---/** * Provides efficient access to encoded characters in strings.  It * does so by reading characters one block at a time, rather than one * character at a time, which gives string implementations an * opportunity to optimize the decoding. */template <class Reader, class Input = Reader*, unsigned kSize = 256>class InputBuffer : public CharacterStream { public:  virtual void Rewind();  inline void Reset(Input input);  void Seek(unsigned position);  inline void Reset(unsigned position, Input input); protected:  InputBuffer() { }  explicit InputBuffer(Input input) { Reset(input); }  virtual void FillBuffer();  // A custom offset that can be used by the string implementation to  // mark progress within the encoded string.  unsigned offset_;  // The input string  Input input_;  // To avoid heap allocation, we keep an internal buffer to which  // the encoded string can write its characters.  The string  // implementation is free to decide whether it wants to use this  // buffer or not.  byte util_buffer_[kSize];};// --- U t f 8   I n p u t   B u f f e r ---template <unsigned s = 256>class Utf8InputBuffer : public InputBuffer<Utf8, Buffer<const char*>, s> { public:  inline Utf8InputBuffer() { }  inline Utf8InputBuffer(const char* data, unsigned length);  inline void Reset(const char* data, unsigned length) {    InputBuffer<Utf8, Buffer<const char*>, s>::Reset(        Buffer<const char*>(data, length));  }};struct Uppercase {  static bool Is(uchar c);};struct Lowercase {  static bool Is(uchar c);};struct Letter {  static bool Is(uchar c);};struct Space {  static bool Is(uchar c);};struct Titlecase {  static bool Is(uchar c);};struct Number {  static bool Is(uchar c);};struct DecimalDigit {  static bool Is(uchar c);};struct Ideographic {  static bool Is(uchar c);};struct WhiteSpace {  static bool Is(uchar c);};struct HexDigit {  static bool Is(uchar c);};struct AsciiHexDigit {  static bool Is(uchar c);};struct BidiControl {  static bool Is(uchar c);};struct JoinControl {  static bool Is(uchar c);};struct Dash {  static bool Is(uchar c);};struct Hyphen {  static bool Is(uchar c);};struct LineTerminator {  static bool Is(uchar c);};struct CombiningMark {  static bool Is(uchar c);};struct ConnectorPunctuation {  static bool Is(uchar c);};struct ToLowercase {  static int Convert(uchar c,                     uchar n,                     uchar* result,                     bool* allow_caching_ptr);};struct ToUppercase {  static int Convert(uchar c,                     uchar n,                     uchar* result,                     bool* allow_caching_ptr);};}  // namespace unibrow#endif  // __UNIBROW_H__
unicode.h.svn-base - 源码说明

本页面展示了「Google浏览器V8内核代码」中的 unicode.h.svn-base 源码文件，采用 SVN-BASE 编程语言编写，共 283 行代码。您可以在线阅读完整代码内容，也可以返回资源详情页下载完整源码包进行本地学习和开发。
虫虫下载站收录了大量与Google相关的技术资源，包括源代码、技术文档、电路图等，是电子工程师和嵌入式开发者的专业学习平台。
⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?