📄 compilers-scanners.st
字号:
scanToken
"Scan the next token and compute its token type."
| nextState tok typeAction stateStack saveChar saveState |
stateStack := Stack new.
self atEnd
ifTrue: [self signalEndOfInput]
ifFalse:
[stateStack push: self startState.
[(nextState := stateStack top transitionFor: self nextChar ifNone: [nil]) isNil]
whileFalse:
[stateStack push: nextState.
self getNextChar].
"save the current position for error notification"
self savePosition: self position + (self atEnd ifTrue: [1] ifFalse: [0]).
stateStack top isFSAFinalState
ifFalse:
[saveChar := self nextChar.
saveState := stateStack top.
"backup to the previous final state or to the start state"
[stateStack size = 1 or: [stateStack top isFSAFinalState]]
whileFalse:
[stateStack pop.
self putBackChar].
stateStack size = 1 ifTrue:
["backed up to the start state so signal an error"
saveState transitionFor: saveChar]].
"answer the newly scanned token"
tok := self buffer contents.
typeAction := stateStack top tokenTypeAndActionFor: tok.
self tokenType: typeAction type.
self token: tok.
self buffer reset.
typeAction action notNil ifTrue: [self perform: typeAction action]]! !
FSABasedLookaheadScanner subclass: #FSABasedScannerWithTwoTokenLookahead
instanceVariableNames: 'stateStack saveState saveChar '
classVariableNames: ''
poolDictionaries: ''
category: 'Compilers-Scanners'!
FSABasedScannerWithTwoTokenLookahead comment:
'=================================================
Copyright (c) 1992 by Justin O. Graver.
All rights reserved (with exceptions).
For complete information evaluate "Object tgenCopyright."
=================================================
This class provides a scanner with simple two-token lookahead.
Instance Variables:
stateStack <Stack> - primary state stack for scanning tokens.
saveState <Integer> - pointer into input source for error notification.
saveChar <Character> - pointer into input source for error notification.'!
!FSABasedScannerWithTwoTokenLookahead methodsFor: 'scanner generation'!
defaultOptimizedScannerClass
^OptimizedScannerWithTwoTokenLookahead! !
!FSABasedScannerWithTwoTokenLookahead methodsFor: 'scanning'!
checkForTokenIn: newStateStack buffer: charBuffer
"Scan the input using the arguments. Answer true if a legal token (or no illegal token) was
found and false otherwise."
| nextState |
self atEnd
ifFalse:
[newStateStack push: self startState.
"look for longest possible token"
[(nextState := newStateStack top transitionFor: self nextChar ifNone: [nil]) isNil]
whileFalse:
[newStateStack push: nextState.
"getNextChar for local vars"
charBuffer nextPut: self nextChar.
self nextChar: self source next].
"save the current position for error notification"
self savePosition: self position + (self atEnd ifTrue: [1] ifFalse: [0]).
newStateStack top isFSAFinalState
ifFalse:
[self saveChar: self nextChar.
self saveState: newStateStack top.
"backup to the previous final state or to the start state"
[newStateStack size = 1 or: [newStateStack top isFSAFinalState]]
whileFalse:
[newStateStack pop.
"putBackChar for local vars"
charBuffer backspace.
self backspaceSource].
newStateStack size = 1 ifTrue:
["backed up to the start state"
self stateStack == newStateStack
ifTrue:
["this is the first token, so signal an error (abort and return)"
self saveState transitionFor: self saveChar]
ifFalse:
["we may be able to backup in the previous token"
^false]]]].
^true!
scanToken
"Scan the next token and compute its token type."
| tok typeAction newStateStack charBuffer |
newStateStack := Stack new.
charBuffer := RetractableWriteStream on: (String new: 32).
(self checkForTokenIn: newStateStack buffer: charBuffer)
ifTrue:
["either a legal token or the end on input was found"
self stateStack isEmpty ifTrue: [self atEnd
ifTrue: [^self signalEndOfInput]
ifFalse: [self error: 'no more vaild tokens']].
tok := self buffer contents.
typeAction := self stateStack top tokenTypeAndActionFor: tok.
self tokenType: typeAction type.
self token: tok.
self buffer: charBuffer.
self stateStack: newStateStack.
typeAction action notNil ifTrue: [self perform: typeAction action]]
ifFalse:
["an illegal token was found, try to look for earlier final state in current token buffers"
charBuffer size timesRepeat:
["put back illegal token chars"
self backspaceSource].
"backup in current token to next smallest legal token"
[self stateStack size = 1
or:
[self stateStack pop.
self putBackChar.
self stateStack top isFSAFinalState]] whileFalse.
self stateStack size = 1
ifTrue:
["no smaller legal token so signal error"
self saveState transitionFor: self saveChar]
ifFalse:
["try again"
self scanToken]]! !
!FSABasedScannerWithTwoTokenLookahead methodsFor: 'state accessing'!
saveChar
^saveChar!
saveChar: argument
saveChar := argument!
saveState
^saveState!
saveState: argument
saveState := argument!
stateStack
^stateStack!
stateStack: argument
stateStack := argument! !
!FSABasedScannerWithTwoTokenLookahead methodsFor: 'initialization'!
reset
"Reset the initial state of the scanner before scanning a new source."
super reset.
self stateStack: Stack new!
scanSource: aString
"Convert the input string to a read stream and scan the first token."
self reset.
self source: (RetractableReadStream on: aString).
self nextChar: self source next.
self checkForTokenIn: self stateStack buffer: self buffer.
self scanToken! !
AbstractScanner subclass: #HandCodedScanner
instanceVariableNames: 'charTypeTable '
classVariableNames: ''
poolDictionaries: ''
category: 'Compilers-Scanners'!
HandCodedScanner comment:
'=================================================
Copyright (c) 1992 by Justin O. Graver.
All rights reserved (with exceptions).
For complete information evaluate "Object tgenCopyright."
=================================================
I am an abstract class of scanner that scans a source string and breaks it up into tokens using a character type table and hand-coded scanner methods. Specific type tables are stored in class instance variables of my concrete subclasses.
Instance Variables:
charTypeTable <Array of: Symbol> - a local reference to the type table for this class of scanner; the ascii value of each character is mapped to a symbol token type.
'!
!HandCodedScanner methodsFor: 'state accessing'!
charTypeTable
^charTypeTable!
charTypeTable: argument
charTypeTable := argument! !
!HandCodedScanner methodsFor: 'initialization'!
init
super init.
self charTypeTable: self myTypeTable! !
!HandCodedScanner methodsFor: 'accessing'!
endOfInputToken
"Answer a token representing the end of the input."
^nil!
endOfInputTokenType
"Answer the token type representing the end of the input."
^#doIt!
myTypeTable
^self class charTypeTable! !
!HandCodedScanner methodsFor: 'testing'!
atStartOfComplexToken
"Answer true if the first character of the tokenType is an $x and false otherwise."
^(self tokenType at: 1)
= $x! !
!HandCodedScanner methodsFor: 'scanning'!
scanToken
"Scan the next token and compute its token type. This may be
overridden in subclasses for efficiency and customization."
[self atEnd ifTrue: [^self signalEndOfInput].
self tokenType: (self charTypeTable at: self nextChar asInteger).
self tokenType == #xDelimiter]
whileTrue:
["Skip delimiters fast, there almost always is one."
self getNextChar].
self atStartOfComplexToken
ifTrue:
["perform to compute token & type"
self perform: tokenType]
ifFalse:
["else just the character"
self token: self nextChar.
self getNextChar]! !
"-- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- "!
HandCodedScanner class
instanceVariableNames: 'charTypeTable '!
!HandCodedScanner class methodsFor: 'class initialization'!
initialize
"Concrete subclasses must provide a character type table."
"HandCodedScanner initialize"
| newTable |
newTable := Array new: 256 withAll: #xDefault. "default"
self charTypeTable: newTable! !
!HandCodedScanner class methodsFor: 'state accessing'!
charTypeTable
^charTypeTable!
charTypeTable: argument
charTypeTable := argument! !
FSABasedScanner subclass: #OptimizedScanner
instanceVariableNames: 'finalStateTable '
classVariableNames: 'NoTransitionSignal '
poolDictionaries: ''
category: 'Compilers-Scanners'!
OptimizedScanner comment:
'=================================================
Copyright (c) 1992 by Justin O. Graver.
All rights reserved (with exceptions).
For complete information evaluate "Object tgenCopyright."
=================================================
I am an abstract class of scanner that scans a source string and breaks it up into tokens
using a table created by converting FSA to integer.
instance Variables:
finalStateTable - a table that maps integer ( represented as final state ) to
literal tokens and token classes.
'!
!OptimizedScanner methodsFor: 'converting'!
assignNextIDAfter: id toSuccessorOf: state
"I try to assing a number to fsa in order to create a fsa table."
| nextID nextState |
nextID := id + 1.
state edgeLabelMap
associationsDo:
[:assoc |
nextState := assoc value.
nextState stateID isNil
ifTrue:
[nextState stateID: nextID.
nextState isFSAFinalState ifTrue: [(finalStateTable includes: nextState)
ifFalse: [finalStateTable at: nextID put: nextState]].
nextID := self assignNextIDAfter: nextID toSuccessorOf: nextState]].
^nextID!
changeFSAToObjectTable: fsaState
| sizePlusOne objectTable |
fsaState stateID notNil ifTrue: [fsaState nilOutStateIDs].
fsaState stateID: self startState.
self finalStateTable: Dictionary new.
sizePlusOne := self assignNextIDAfter: self startState toSuccessorOf: fsaState.
objectTable := Array new: sizePlusOne - 1.
self convert: fsaState to: objectTable.
self modifyFSAFinalStates: sizePlusOne - 1. "convert Dictionary to Array for speed"
^objectTable!
convert: state to: objectTable
"I try to create a table that maps state ( represented by integer ) to state"
| arr nextState |
arr := Array new: 127.
objectTable at: state stateID put: arr.
state edgeLabelMap
associationsDo:
[:assoc |
nextState := assoc value.
(objectTable at: nextState stateID) isNil ifTrue: [self convert: nextState to: objectTable].
arr at: assoc key asInteger put: nextState stateID].
^objectTable!
convertToTable: fsaScanner
self fsa: (self changeFSAToObjectTable: fsaScanner fsa)!
modifyFSAFinalStates: index
"Convert Dictionary and its values to Array of Array"
| tokenSet table |
table := Array new: index.
finalStateTable do:
[:st |
tokenSet := Array new: 2.
tokenSet at: 1 put: st literalTokens asOrderedCollection asArray; at: 2 put: st tokenClasses asArray.
table at: st stateID put: tokenSet].
self finalStateTable: table! !
!OptimizedScanner methodsFor: 'private'!
at: state transitionFor: char
| value |
(value := (fsa at: state)
at: char asInteger) isNil ifTrue: [(finalStateTable at: state) isNil ifTrue: [self raiseNoTransitionExceptionErrorString: (char == self endOfInputToken
ifTrue: [self endOfInputErrorString]
ifFalse: [self standardErrorString , '''' , char printString , ''''])]].
^value! !
!OptimizedScanner methodsFor: 'initialization'!
init
super init.
self finalStateTable: self myFinalStateTable! !
!OptimizedScanner methodsFor: 'state accessing'!
finalStateTable
^finalStateTable!
finalStateTable: arg
finalStateTable := arg! !
!OptimizedScanner methodsFor: 'accessing'!
myFinalStateTable
^self class finalStateTable!
startState
^1! !
!OptimizedScanner methodsFor: 'exception handling'!
endOfInputErrorString
^'end of input encountered'!
raiseNoTransitionExceptionErrorString: aString
self class noTransitionSignal raiseErrorString: aString!
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -