scanner.cc.svn-base

来自「Google浏览器V8内核代码」· SVN-BASE 代码 · 共 827 行 · 第 1/2 页
SVN-BASE
827 行
// Copyright 2006-2008 the V8 project authors. All rights reserved.// Redistribution and use in source and binary forms, with or without// modification, are permitted provided that the following conditions are// met:////     * Redistributions of source code must retain the above copyright//       notice, this list of conditions and the following disclaimer.//     * Redistributions in binary form must reproduce the above//       copyright notice, this list of conditions and the following//       disclaimer in the documentation and/or other materials provided//       with the distribution.//     * Neither the name of Google Inc. nor the names of its//       contributors may be used to endorse or promote products derived//       from this software without specific prior written permission.//// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.#include "v8.h"#include "ast.h"#include "scanner.h"namespace v8 { namespace internal {// ----------------------------------------------------------------------------// Character predicatesunibrow::Predicate<IdentifierStart, 128> Scanner::kIsIdentifierStart;unibrow::Predicate<IdentifierPart, 128> Scanner::kIsIdentifierPart;unibrow::Predicate<unibrow::LineTerminator, 128> Scanner::kIsLineTerminator;unibrow::Predicate<unibrow::WhiteSpace, 128> Scanner::kIsWhiteSpace;StaticResource<Scanner::Utf8Decoder> Scanner::utf8_decoder_;// ----------------------------------------------------------------------------// UTF8BufferUTF8Buffer::UTF8Buffer() : data_(NULL) {  Initialize(NULL, 0);}UTF8Buffer::~UTF8Buffer() {  DeleteArray(data_);}void UTF8Buffer::Initialize(char* src, int length) {  DeleteArray(data_);  data_ = src;  size_ = length;  Reset();}void UTF8Buffer::AddChar(uc32 c) {  const int min_size = 1024;  if (pos_ + static_cast<int>(unibrow::Utf8::kMaxEncodedSize) > size_) {    int new_size = size_ * 2;    if (new_size < min_size) {      new_size = min_size;    }    char* new_data = NewArray<char>(new_size);    memcpy(new_data, data_, pos_);    DeleteArray(data_);    data_ = new_data;    size_ = new_size;  }  if (static_cast<unsigned>(c) < unibrow::Utf8::kMaxOneByteChar) {    data_[pos_++] = c;  // common case: 7bit ASCII  } else {    pos_ += unibrow::Utf8::Encode(&data_[pos_], c);  }  ASSERT(pos_ <= size_);}// ----------------------------------------------------------------------------// UTF16BufferUTF16Buffer::UTF16Buffer()  : pos_(0),    pushback_buffer_(0),    last_(0),    stream_(NULL) { }void UTF16Buffer::Initialize(Handle<String> data,                             unibrow::CharacterStream* input) {  data_ = data;  pos_ = 0;  stream_ = input;}Handle<String> UTF16Buffer::SubString(int start, int end) {  return internal::SubString(data_, start, end);}void UTF16Buffer::PushBack(uc32 ch) {  pushback_buffer()->Add(last_);  last_ = ch;  pos_--;}uc32 UTF16Buffer::Advance() {  // NOTE: It is of importance to Persian / Farsi resources that we do  // *not* strip format control characters in the scanner; see  //  //    https://bugzilla.mozilla.org/show_bug.cgi?id=274152  //  // So, even though ECMA-262, section 7.1, page 11, dictates that we  // must remove Unicode format-control characters, we do not. This is  // in line with how IE and SpiderMonkey handles it.  if (!pushback_buffer()->is_empty()) {    pos_++;    return last_ = pushback_buffer()->RemoveLast();  } else if (stream_->has_more()) {    pos_++;    uc32 next = stream_->GetNext();    return last_ = next;  } else {    // note: currently the following increment is necessary to avoid a    // test-parser problem!    pos_++;    return last_ = static_cast<uc32>(-1);  }}void UTF16Buffer::SeekForward(int pos) {  pos_ = pos;  ASSERT(pushback_buffer()->is_empty());  stream_->Seek(pos);}// ----------------------------------------------------------------------------// ScannerScanner::Scanner(bool pre) : stack_overflow_(false), is_pre_parsing_(pre) {  Token::Initialize();}void Scanner::Init(Handle<String> source, unibrow::CharacterStream* stream,    int position) {  // Initialize the source buffer.  source_.Initialize(source, stream);  position_ = position;  // Reset literals buffer  literals_.Reset();  // Set c0_ (one character ahead)  ASSERT(kCharacterLookaheadBufferSize == 1);  Advance();  // Skip initial whitespace (allowing HTML comment ends) and scan  // first token.  SkipWhiteSpace(true);  Scan();}Handle<String> Scanner::SubString(int start, int end) {  return source_.SubString(start - position_, end - position_);}Token::Value Scanner::Next() {  // BUG 1215673: Find a thread safe way to set a stack limit in  // pre-parse mode. Otherwise, we cannot safely pre-parse from other  // threads.  current_ = next_;  // Check for stack-overflow before returning any tokens.  StackLimitCheck check;  if (check.HasOverflowed()) {    stack_overflow_ = true;    next_.token = Token::ILLEGAL;  } else {    Scan();  }  return current_.token;}void Scanner::StartLiteral() {  next_.literal_pos = literals_.pos();}void Scanner::AddChar(uc32 c) {  literals_.AddChar(c);}void Scanner::TerminateLiteral() {  next_.literal_end = literals_.pos();  AddChar(0);}void Scanner::AddCharAdvance() {  AddChar(c0_);  Advance();}void Scanner::Advance() {  c0_ = source_.Advance();}void Scanner::PushBack(uc32 ch) {  source_.PushBack(ch);  c0_ = ch;}void Scanner::SkipWhiteSpace(bool initial) {  has_line_terminator_before_next_ = initial;  while (true) {    while (kIsWhiteSpace.get(c0_)) {      // IsWhiteSpace() includes line terminators!      if (kIsLineTerminator.get(c0_))        // Ignore line terminators, but remember them. This is necessary        // for automatic semicolon insertion.        has_line_terminator_before_next_ = true;      Advance();    }    // If there is an HTML comment end '-->' at the beginning of a    // line (with only whitespace in front of it), we treat the rest    // of the line as a comment. This is in line with the way    // SpiderMonkey handles it.    if (c0_ == '-' && has_line_terminator_before_next_) {      Advance();      if (c0_ == '-') {        Advance();        if (c0_ == '>') {          // Treat the rest of the line as a comment.          SkipSingleLineComment();          // Continue skipping white space after the comment.          continue;        }        PushBack('-');  // undo Advance()      }      PushBack('-');  // undo Advance()    }    return;  }}Token::Value Scanner::SkipSingleLineComment() {  Advance();  // The line terminator at the end of the line is not considered  // to be part of the single-line comment; it is recognized  // separately by the lexical grammar and becomes part of the  // stream of input elements for the syntactic grammar (see  // ECMA-262, section 7.4, page 12).  while (c0_ >= 0 && !kIsLineTerminator.get(c0_)) {    Advance();  }  return Token::COMMENT;}Token::Value Scanner::SkipMultiLineComment() {  ASSERT(c0_ == '*');  Advance();  while (c0_ >= 0) {    char ch = c0_;    Advance();    // If we have reached the end of the multi-line comment, we    // consume the '/' and insert a whitespace. This way all    // multi-line comments are treated as whitespace - even the ones    // containing line terminators. This contradicts ECMA-262, section    // 7.4, page 12, that says that multi-line comments containing    // line terminators should be treated as a line terminator, but it    // matches the behaviour of SpiderMonkey and KJS.    if (ch == '*' && c0_ == '/') {      c0_ = ' ';      return Token::COMMENT;    }  }  // Unterminated multi-line comment.  return Token::ILLEGAL;}Token::Value Scanner::ScanHtmlComment() {  // Check for <!-- comments.  ASSERT(c0_ == '!');  Advance();  if (c0_ == '-') {    Advance();    if (c0_ == '-') return SkipSingleLineComment();    PushBack('-');  // undo Advance()  }  PushBack('!');  // undo Advance()  ASSERT(c0_ == '!');  return Token::LT;}void Scanner::Scan() {  Token::Value token;  bool has_line_terminator = false;  do {    SkipWhiteSpace(has_line_terminator);    // Remember the line terminator in previous loop    has_line_terminator = has_line_terminator_before_next();    // Remember the position of the next token    next_.location.beg_pos = source_pos();    token = ScanToken();  } while (token == Token::COMMENT);  next_.location.end_pos = source_pos();  next_.token = token;}void Scanner::SeekForward(int pos) {  source_.SeekForward(pos - 1);  Advance();  Scan();}uc32 Scanner::ScanHexEscape(uc32 c, int length) {  ASSERT(length <= 4);  // prevent overflow  uc32 digits[4];  uc32 x = 0;  for (int i = 0; i < length; i++) {    digits[i] = c0_;    int d = HexValue(c0_);    if (d < 0) {      // According to ECMA-262, 3rd, 7.8.4, page 18, these hex escapes      // should be illegal, but other JS VMs just return the      // non-escaped version of the original character.      // Push back digits read, except the last one (in c0_).      for (int j = i-1; j >= 0; j--) {        PushBack(digits[j]);      }      return c;    }    x = x * 16 + d;    Advance();  }  return x;}// Octal escapes of the forms '\0xx' and '\xxx' are not a part of// ECMA-262. Other JS VMs support them.uc32 Scanner::ScanOctalEscape(uc32 c, int length) {  uc32 x = c - '0';  for (int i = 0; i < length; i++) {    int d = c0_ - '0';    if (d < 0 || d > 7) break;    int nx = x * 8 + d;    if (nx >= 256) break;    x = nx;    Advance();  }  return x;}void Scanner::ScanEscape() {  uc32 c = c0_;  Advance();  // Skip escaped newlines.  if (kIsLineTerminator.get(c)) {    // Allow CR+LF newlines in multiline string literals.    if (IsCarriageReturn(c) && IsLineFeed(c0_)) Advance();    // Allow LF+CR newlines in multiline string literals.    if (IsLineFeed(c) && IsCarriageReturn(c0_)) Advance();    return;  }  switch (c) {    case '\'':  // fall through
scanner.cc.svn-base - 源码说明

本页面展示了「Google浏览器V8内核代码」中的 scanner.cc.svn-base 源码文件，采用 SVN-BASE 编程语言编写，共 827 行代码。您可以在线阅读完整代码内容，也可以返回资源详情页下载完整源码包进行本地学习和开发。
虫虫下载站收录了大量与Google相关的技术资源，包括源代码、技术文档、电路图等，是电子工程师和嵌入式开发者的专业学习平台。
⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?