compilers-scanners.st

来自「編譯器的語法產生器」· ST 代码 · 共 1,256 行 · 第 1/3 页

ST
1,256
字号

standardErrorString

	^'illegal character encountered:  '! !

!OptimizedScanner methodsFor: 'testing'!

atEnd

	^nextChar == self endOfInputToken		"end-of-file character"! !

!OptimizedScanner methodsFor: 'reconstructing'!

reconstructFinalStateTableOn: aStream 

	aStream nextPutAll: 'table := '.
	finalStateTable reconstructOn: aStream.
	aStream
		period;
		crtab;
		nextPutAll: 'self constructFinalStateTable: table'!

reconstructFSAOn: aStream 

	aStream nextPutAll: 'self fsa: '.
	fsa reconstructOn: aStream.
	aStream period; crtab!

reconstructOn: aStream 
	"Recreate fsa and final state tables"

	self reconstructFSAOn: aStream.
	self reconstructFinalStateTableOn: aStream! !

!OptimizedScanner methodsFor: 'scanner generation'!

classInitializationMethodTextForClassNamed: name spec: tokenSpec
 | ws |
 ws := self newStreamForMethodRendering.
 ws
  nextPutAll: 'initialize';
  crtab;
  nextPut: $";
  nextPutAll: name;
  nextPutAll: ' initialize"';
  crtab;
  nextPut: $".
 tokenSpec do:
  [:ch |
  "double embedded double-quote characters"
  ws nextPut: ch.
  ch = $" ifTrue: [ws nextPut: $"]].
 ws
  nextPut: $";
  cr;
  crtab;
  nextPutAll: '| table |';
  crtab.
 self reconstructOn: ws.
 ^ws contents! !

!OptimizedScanner methodsFor: 'scanning'!

at: state tokenTypeAndActionFor: aString 
	"The current implementation does not handle overlapping token classes. Hence, a final state 
	can only represent a literal or a single token class. Therefore, if not a literal then it must be 
	the token class."

	| tc |
	(((finalStateTable at: state)
		at: 1)
		includes: aString)
		ifTrue: [^TokenTypeActionHolder type: aString action: nil].
	tc := ((finalStateTable at: state)
				at: 2) first .
	^TokenTypeActionHolder type: tc tokenType action: tc action!

getNextChar
	"Source will answer an eof char when no more input is available. 
	Subclasses may override this to avoid unnecessary buffering."

	buffer nextPut: nextChar.
	nextChar := source next!

signalEndOfInput
	"Set scanner to the end-of-input state."

	tokenType := token := self endOfInputToken! !
"-- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- "!

OptimizedScanner class
	instanceVariableNames: 'finalStateTable tokenTable '!


!OptimizedScanner class methodsFor: 'state accessing'!

finalStateTable

	^finalStateTable!

finalStateTable: arg 

	finalStateTable := arg!

noTransitionSignal

	^NoTransitionSignal!

noTransitionSignal: arg 

	NoTransitionSignal := arg!

tokenTable

	^tokenTable!

tokenTable: arg 

	tokenTable := arg! !

!OptimizedScanner class methodsFor: 'class initialization'!

initialize
	"OptimizedScanner initialize"

	self noTransitionSignal: (Signal new nameClass: self message: #noTransitionSymbol).! !

!OptimizedScanner class methodsFor: 'reconstructing'!

constructFinalStateTable: arg 

	finalStateTable := Array new: arg size.
	1 to: arg size do: [:index | finalStateTable at: index put: ((arg at: index) isNil
				ifTrue: [nil]
				ifFalse: [Array with: ((arg at: index)
							at: 1)
						with: (self constructTokenClassification: ((arg at: index)
									at: 2))])]!

constructTokenClassification: aCollection 

	| tc ea arr |
	aCollection size == 1
		ifTrue: 
			[tc := aCollection first.
			^Array with: (TokenClassification
					tokenType: (tc at: 1)
					action: (tc at: 2))]
		ifFalse: 
			[arr := Array new: aCollection size.
			1 to: aCollection size do: 
				[:index | 
				ea := aCollection at: index.
				arr at: index put: (TokenClassification
						tokenType: (ea at: 1)
						action: (ea at: 2))].
			^arr]! !

!OptimizedScanner class methodsFor: 'instance creation'!

buildFrom: fsaScanner

	^self new convertToTable: fsaScanner! !

OptimizedScanner subclass: #OptimizedLookaheadScanner
	instanceVariableNames: 'savePosition '
	classVariableNames: ''
	poolDictionaries: ''
	category: 'Compilers-Scanners'!
OptimizedLookaheadScanner comment:
'=================================================
    Copyright (c) 1992 by Justin O. Graver.
    All rights reserved (with exceptions).
    For complete information evaluate "Object tgenCopyright."
=================================================

This is an abstract class for table-based optimized scanners with lookahead.

Instance Variables:
	savePosition <Integer> - pointer into input source for error notification.'!


!OptimizedLookaheadScanner methodsFor: 'accessing'!

errorPosition
	"Answer the source position of the last acceptable character."

	^self savePosition max: 1! !

!OptimizedLookaheadScanner methodsFor: 'initialization'!

reset
	"Reset the initial state of the scanner before scanning a new source."

	super reset.
	self savePosition: 0! !

!OptimizedLookaheadScanner methodsFor: 'state accessing'!

savePosition

	^savePosition!

savePosition: argument 

	savePosition := argument! !

!OptimizedLookaheadScanner methodsFor: 'testing'!

isFSAFinalState: aState
    "Answer true if aState is a final state, false otherwise."

    ^(self finalStateTable at: aState) notNil! !

OptimizedLookaheadScanner subclass: #OptimizedScannerWithOneTokenLookahead
	instanceVariableNames: ''
	classVariableNames: ''
	poolDictionaries: ''
	category: 'Compilers-Scanners'!
OptimizedScannerWithOneTokenLookahead comment:
'=================================================
    Copyright (c) 1992 by Justin O. Graver.
    All rights reserved (with exceptions).
    For complete information evaluate "Object tgenCopyright."
=================================================

This class provides a table-based optimized scanner with simple one-token lookahead.  '!


!OptimizedScannerWithOneTokenLookahead methodsFor: 'scanning'!

scanToken
	"Scan the next token and compute its token type."

	| nextState tok typeAction stateStack saveChar saveState |
	stateStack := Stack new.
	self atEnd
		ifTrue: [self signalEndOfInput]
		ifFalse:
			[stateStack push: self startState.
			[(nextState := (fsa at: stateStack top) at: self nextChar asInteger) isNil]
				whileFalse:
					[stateStack push: nextState.
					self getNextChar].
			"save the current position for error notification"
			self savePosition: self position + (self atEnd ifTrue: [1] ifFalse: [0]).
			(self isFSAFinalState: stateStack top)
				ifFalse:
					["save the current position for error notification"
					saveChar := self nextChar.
					saveState := stateStack top.
					"backup to the previous final state or to the start state"
					[stateStack size = 1 or: [self isFSAFinalState: stateStack top]]
						whileFalse:
							[stateStack pop.
							self putBackChar].
					stateStack size = 1
						ifTrue:
						["backed up to the start state so signal an error"
						self at: saveState transitionFor: saveChar]].
		"answer the newly scanned token"
		tok := self buffer contents.
		typeAction := self at: stateStack top tokenTypeAndActionFor: tok.
		self tokenType: typeAction type.
		self token: tok.
		self buffer reset.
		typeAction action notNil ifTrue: [self perform: typeAction action]]! !

OptimizedLookaheadScanner subclass: #OptimizedScannerWithTwoTokenLookahead
	instanceVariableNames: 'stateStack saveState saveChar '
	classVariableNames: ''
	poolDictionaries: ''
	category: 'Compilers-Scanners'!
OptimizedScannerWithTwoTokenLookahead comment:
'=================================================
    Copyright (c) 1992 by Justin O. Graver.
    All rights reserved (with exceptions).
    For complete information evaluate "Object tgenCopyright."
=================================================

This class provides a table-based optimized scanner with simple two-token lookahead.

Instance Variables:
	stateStack	<Stack> - primary state stack for scanning tokens.
	saveState	<Integer> - pointer into input source for error notification.
	saveChar	<Character> - pointer into input source for error notification.'!


!OptimizedScannerWithTwoTokenLookahead methodsFor: 'scanning'!

checkForTokenIn: newStateStack buffer: charBuffer 
	"Scan the input using the arguments. Answer true if a legal token (or no illegal token) was 
	found and false otherwise."

	| nextState |
	self atEnd
		ifFalse: 
			[newStateStack push: self startState.
			"look for longest possible token"
			[(nextState := (fsa at: newStateStack top) at: self nextChar asInteger) isNil]
				whileFalse: 
					[newStateStack push: nextState.
					"getNextChar for local vars"
					charBuffer nextPut: self nextChar.
					self nextChar: self source next].
			"save the current position for error notification"
			self savePosition: self position + (self atEnd ifTrue: [1] ifFalse: [0]).
			(self isFSAFinalState: newStateStack top)
				ifFalse: 
					["save the current position for error notification"
					saveChar := self nextChar.
					saveState := newStateStack top.
					"backup to the previous final state or to the start state"
					[newStateStack size = 1 or: [self isFSAFinalState: newStateStack top]]
						whileFalse: 
							[newStateStack pop.
							"putBackChar for local vars"
							charBuffer backspace.
							self backspaceSource].
					newStateStack size = 1 ifTrue: 
						["backed up to the start state"
						self stateStack == newStateStack
							ifTrue: 
								["this is the first token, so signal an error (abort and return)"
								self at: saveState transitionFor: saveChar]
							ifFalse: 
								["we may be able to backup in the previous token"
								^false]]]].
	^true!

scanToken
	"Scan the next token and compute its token type."

	| tok typeAction newStateStack charBuffer |
	newStateStack := Stack new.
	charBuffer := RetractableWriteStream on: (String new: 32).
	(self checkForTokenIn: newStateStack buffer: charBuffer)
		ifTrue: 
			["either a legal token or the end on input was found"
			self stateStack isEmpty ifTrue: [self atEnd
					ifTrue: [^self signalEndOfInput]
					ifFalse: [self error: 'no more vaild tokens']].
			tok := self buffer contents.
			typeAction := self at: stateStack top tokenTypeAndActionFor: tok.
			self tokenType: typeAction type.
			self token: tok.
			self buffer: charBuffer.
			self stateStack: newStateStack.
			typeAction action notNil ifTrue: [self perform: typeAction action]]
		ifFalse: 
			["an illegal token was found, try to look for earlier final state in current token buffers"
			charBuffer size timesRepeat: 
				["put back illegal token chars"
				self backspaceSource].
			"backup in current token to next smallest legal token"
			[self stateStack size = 1
				or: 
					[self stateStack pop.
					self putBackChar.
					self isFSAFinalState: stateStack top]] whileFalse.
			self stateStack size = 1
				ifTrue: 
					["no smaller legal token so signal error"
					self at: saveState transitionFor: saveChar]
				ifFalse: 
					["try again"
					self scanToken]]! !

!OptimizedScannerWithTwoTokenLookahead methodsFor: 'initialization'!

reset
	"Reset the initial state of the scanner before scanning a new source."

	super reset.
	self stateStack: Stack new!

scanSource: aString 
	"Convert the input string to a read stream and scan the first token."

	self reset.
	self source: (RetractableReadStream on: aString).
	self nextChar: self source next.
	self checkForTokenIn: self stateStack buffer: self buffer.
	self scanToken! !

!OptimizedScannerWithTwoTokenLookahead methodsFor: 'state accessing'!

saveChar

	^saveChar!

saveChar: argument 

	saveChar := argument!

saveState

	^saveState!

saveState: argument 

	saveState := argument!

stateStack

	^stateStack!

stateStack: argument 

	stateStack := argument! !
FSABasedScanner initialize!

HandCodedScanner initialize!

OptimizedScanner initialize!


⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?