📄 scanner.cc.svn-base
字号:
// Copyright 2006-2008 the V8 project authors. All rights reserved.// Redistribution and use in source and binary forms, with or without// modification, are permitted provided that the following conditions are// met://// * Redistributions of source code must retain the above copyright// notice, this list of conditions and the following disclaimer.// * Redistributions in binary form must reproduce the above// copyright notice, this list of conditions and the following// disclaimer in the documentation and/or other materials provided// with the distribution.// * Neither the name of Google Inc. nor the names of its// contributors may be used to endorse or promote products derived// from this software without specific prior written permission.//// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.#include "v8.h"#include "ast.h"#include "scanner.h"namespace v8 { namespace internal {// ----------------------------------------------------------------------------// Character predicatesunibrow::Predicate<IdentifierStart, 128> Scanner::kIsIdentifierStart;unibrow::Predicate<IdentifierPart, 128> Scanner::kIsIdentifierPart;unibrow::Predicate<unibrow::LineTerminator, 128> Scanner::kIsLineTerminator;unibrow::Predicate<unibrow::WhiteSpace, 128> Scanner::kIsWhiteSpace;StaticResource<Scanner::Utf8Decoder> Scanner::utf8_decoder_;// ----------------------------------------------------------------------------// UTF8BufferUTF8Buffer::UTF8Buffer() : data_(NULL) { Initialize(NULL, 0);}UTF8Buffer::~UTF8Buffer() { DeleteArray(data_);}void UTF8Buffer::Initialize(char* src, int length) { DeleteArray(data_); data_ = src; size_ = length; Reset();}void UTF8Buffer::AddChar(uc32 c) { const int min_size = 1024; if (pos_ + static_cast<int>(unibrow::Utf8::kMaxEncodedSize) > size_) { int new_size = size_ * 2; if (new_size < min_size) { new_size = min_size; } char* new_data = NewArray<char>(new_size); memcpy(new_data, data_, pos_); DeleteArray(data_); data_ = new_data; size_ = new_size; } if (static_cast<unsigned>(c) < unibrow::Utf8::kMaxOneByteChar) { data_[pos_++] = c; // common case: 7bit ASCII } else { pos_ += unibrow::Utf8::Encode(&data_[pos_], c); } ASSERT(pos_ <= size_);}// ----------------------------------------------------------------------------// UTF16BufferUTF16Buffer::UTF16Buffer() : pos_(0), pushback_buffer_(0), last_(0), stream_(NULL) { }void UTF16Buffer::Initialize(Handle<String> data, unibrow::CharacterStream* input) { data_ = data; pos_ = 0; stream_ = input;}Handle<String> UTF16Buffer::SubString(int start, int end) { return internal::SubString(data_, start, end);}void UTF16Buffer::PushBack(uc32 ch) { pushback_buffer()->Add(last_); last_ = ch; pos_--;}uc32 UTF16Buffer::Advance() { // NOTE: It is of importance to Persian / Farsi resources that we do // *not* strip format control characters in the scanner; see // // https://bugzilla.mozilla.org/show_bug.cgi?id=274152 // // So, even though ECMA-262, section 7.1, page 11, dictates that we // must remove Unicode format-control characters, we do not. This is // in line with how IE and SpiderMonkey handles it. if (!pushback_buffer()->is_empty()) { pos_++; return last_ = pushback_buffer()->RemoveLast(); } else if (stream_->has_more()) { pos_++; uc32 next = stream_->GetNext(); return last_ = next; } else { // note: currently the following increment is necessary to avoid a // test-parser problem! pos_++; return last_ = static_cast<uc32>(-1); }}void UTF16Buffer::SeekForward(int pos) { pos_ = pos; ASSERT(pushback_buffer()->is_empty()); stream_->Seek(pos);}// ----------------------------------------------------------------------------// ScannerScanner::Scanner(bool pre) : stack_overflow_(false), is_pre_parsing_(pre) { Token::Initialize();}void Scanner::Init(Handle<String> source, unibrow::CharacterStream* stream, int position) { // Initialize the source buffer. source_.Initialize(source, stream); position_ = position; // Reset literals buffer literals_.Reset(); // Set c0_ (one character ahead) ASSERT(kCharacterLookaheadBufferSize == 1); Advance(); // Skip initial whitespace (allowing HTML comment ends) and scan // first token. SkipWhiteSpace(true); Scan();}Handle<String> Scanner::SubString(int start, int end) { return source_.SubString(start - position_, end - position_);}Token::Value Scanner::Next() { // BUG 1215673: Find a thread safe way to set a stack limit in // pre-parse mode. Otherwise, we cannot safely pre-parse from other // threads. current_ = next_; // Check for stack-overflow before returning any tokens. StackLimitCheck check; if (check.HasOverflowed()) { stack_overflow_ = true; next_.token = Token::ILLEGAL; } else { Scan(); } return current_.token;}void Scanner::StartLiteral() { next_.literal_pos = literals_.pos();}void Scanner::AddChar(uc32 c) { literals_.AddChar(c);}void Scanner::TerminateLiteral() { next_.literal_end = literals_.pos(); AddChar(0);}void Scanner::AddCharAdvance() { AddChar(c0_); Advance();}void Scanner::Advance() { c0_ = source_.Advance();}void Scanner::PushBack(uc32 ch) { source_.PushBack(ch); c0_ = ch;}void Scanner::SkipWhiteSpace(bool initial) { has_line_terminator_before_next_ = initial; while (true) { while (kIsWhiteSpace.get(c0_)) { // IsWhiteSpace() includes line terminators! if (kIsLineTerminator.get(c0_)) // Ignore line terminators, but remember them. This is necessary // for automatic semicolon insertion. has_line_terminator_before_next_ = true; Advance(); } // If there is an HTML comment end '-->' at the beginning of a // line (with only whitespace in front of it), we treat the rest // of the line as a comment. This is in line with the way // SpiderMonkey handles it. if (c0_ == '-' && has_line_terminator_before_next_) { Advance(); if (c0_ == '-') { Advance(); if (c0_ == '>') { // Treat the rest of the line as a comment. SkipSingleLineComment(); // Continue skipping white space after the comment. continue; } PushBack('-'); // undo Advance() } PushBack('-'); // undo Advance() } return; }}Token::Value Scanner::SkipSingleLineComment() { Advance(); // The line terminator at the end of the line is not considered // to be part of the single-line comment; it is recognized // separately by the lexical grammar and becomes part of the // stream of input elements for the syntactic grammar (see // ECMA-262, section 7.4, page 12). while (c0_ >= 0 && !kIsLineTerminator.get(c0_)) { Advance(); } return Token::COMMENT;}Token::Value Scanner::SkipMultiLineComment() { ASSERT(c0_ == '*'); Advance(); while (c0_ >= 0) { char ch = c0_; Advance(); // If we have reached the end of the multi-line comment, we // consume the '/' and insert a whitespace. This way all // multi-line comments are treated as whitespace - even the ones // containing line terminators. This contradicts ECMA-262, section // 7.4, page 12, that says that multi-line comments containing // line terminators should be treated as a line terminator, but it // matches the behaviour of SpiderMonkey and KJS. if (ch == '*' && c0_ == '/') { c0_ = ' '; return Token::COMMENT; } } // Unterminated multi-line comment. return Token::ILLEGAL;}Token::Value Scanner::ScanHtmlComment() { // Check for <!-- comments. ASSERT(c0_ == '!'); Advance(); if (c0_ == '-') { Advance(); if (c0_ == '-') return SkipSingleLineComment(); PushBack('-'); // undo Advance() } PushBack('!'); // undo Advance() ASSERT(c0_ == '!'); return Token::LT;}void Scanner::Scan() { Token::Value token; bool has_line_terminator = false; do { SkipWhiteSpace(has_line_terminator); // Remember the line terminator in previous loop has_line_terminator = has_line_terminator_before_next(); // Remember the position of the next token next_.location.beg_pos = source_pos(); token = ScanToken(); } while (token == Token::COMMENT); next_.location.end_pos = source_pos(); next_.token = token;}void Scanner::SeekForward(int pos) { source_.SeekForward(pos - 1); Advance(); Scan();}uc32 Scanner::ScanHexEscape(uc32 c, int length) { ASSERT(length <= 4); // prevent overflow uc32 digits[4]; uc32 x = 0; for (int i = 0; i < length; i++) { digits[i] = c0_; int d = HexValue(c0_); if (d < 0) { // According to ECMA-262, 3rd, 7.8.4, page 18, these hex escapes // should be illegal, but other JS VMs just return the // non-escaped version of the original character. // Push back digits read, except the last one (in c0_). for (int j = i-1; j >= 0; j--) { PushBack(digits[j]); } return c; } x = x * 16 + d; Advance(); } return x;}// Octal escapes of the forms '\0xx' and '\xxx' are not a part of// ECMA-262. Other JS VMs support them.uc32 Scanner::ScanOctalEscape(uc32 c, int length) { uc32 x = c - '0'; for (int i = 0; i < length; i++) { int d = c0_ - '0'; if (d < 0 || d > 7) break; int nx = x * 8 + d; if (nx >= 256) break; x = nx; Advance(); } return x;}void Scanner::ScanEscape() { uc32 c = c0_; Advance(); // Skip escaped newlines. if (kIsLineTerminator.get(c)) { // Allow CR+LF newlines in multiline string literals. if (IsCarriageReturn(c) && IsLineFeed(c0_)) Advance(); // Allow LF+CR newlines in multiline string literals. if (IsLineFeed(c) && IsCarriageReturn(c0_)) Advance(); return; } switch (c) { case '\'': // fall through
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -