📄 prslex.asm
字号:
TITLE prslex.asm - Parser's Lexical Analyzer (token fetcher)
;=============================================================================
; prslex.asm - Parser's Utility Functions
;
; Copyright <C> 1985, Microsoft Corporation
;
; Purpose:
; Contains QBI Lexical Analyzer Functions
;
;
;=============================================================================
include version.inc
PRSLEX_ASM = ON
includeOnce architec
includeOnce context
includeOnce exint ;needed for CoR4R8
includeOnce heap
includeOnce names
includeOnce parser
includeOnce prstab
includeOnce prsirw
includeOnce psint
includeOnce qbimsgs
includeOnce rtinterp
includeOnce txtmgr
includeOnce variable ;for FVI_FNNAME [3]
assumes DS,DGROUP
assumes SS,DGROUP
assumes ES,NOTHING
sBegin DATA
extrn $i8_inpbas:byte ; inpbas now supported in AltMath
;List of bytes which, when they follow digits, mean its not an I2
tbFloat label byte
db '.' - '0'
db 'e' - '0'
db 'E' - '0'
db 'd' - '0'
db 'D' - '0'
db '&' - '0'
db '!' - '0'
db '#' - '0'
db '%' - '0' ;even though % means integer, its not fast-path
;through the code (would slow down typical cases
;to check for % in TryI2)
CB_tbFloat EQU $-tbFloat
sEnd DATA
sBegin CP
assumes CS,CP
QUOTE = 022H ;quote char (")
; It is assumed that all classes of characters < TC_PERIOD are valid
; in identifiers.
OrdConstStart 0
OrdConst TC_UCASE ;* Upper Case Alpha
OrdConst TC_LCASE ;* Lower Case Alpha
OrdConst TC_NUM ;* numeric (or .)
OrdConst TC_PERIOD ;* "." (record separator / number / part of id)
OrdConst TC_NEWLINE ;* end-of-line
OrdConst TC_WHITE ;* white space (tab or space)
OrdConst TC_SPECIAL ;* special character
OrdConst TC_PRINT ;* '?' (short hand for PRINT)
OrdConst TC_RW ;* special character reserved word
typChar LABEL BYTE
DB TC_NEWLINE ; NUL
DB TC_SPECIAL ; SOH
DB TC_SPECIAL ; STX
DB TC_SPECIAL ; ETX
DB TC_SPECIAL ; EOT
DB TC_SPECIAL ; ENQ
DB TC_SPECIAL ; ACK
DB TC_SPECIAL ; BEL
DB TC_SPECIAL ; BS
DB TC_WHITE ; HT
;Tabs are expanded to spaces in GetLineBd.
;Don't be tempted to do it at lex time because
;if ReParse, source buffer is copied to pcode.
DB TC_NEWLINE ; LF
DB TC_SPECIAL ; VT
DB TC_SPECIAL ; FF
DB TC_NEWLINE ; CR
DB TC_SPECIAL ; SO
DB TC_SPECIAL ; SI
DB TC_SPECIAL ; DLE
DB TC_SPECIAL ; DC1
DB TC_SPECIAL ; DC2
DB TC_SPECIAL ; DC3
DB TC_SPECIAL ; DC4
DB TC_SPECIAL ; NAK
DB TC_SPECIAL ; SYN
DB TC_SPECIAL ; ETB
DB TC_SPECIAL ; CAN
DB TC_SPECIAL ; EM
DB TC_SPECIAL ; SUB
DB TC_SPECIAL ; ESC
DB TC_SPECIAL ; FS
DB TC_SPECIAL ; GS
DB TC_SPECIAL ; RS
DB TC_SPECIAL ; US
DB TC_WHITE ; spc
DB TC_RW + IRW_EtSingle ; !
DB TC_RW + IRW_DQuote ; "
DB TC_RW + IRW_Lbs ; #
DB TC_RW + IRW_EtString ; $
DB TC_RW + IRW_EtInteger ; %
DB TC_RW + IRW_EtLong ; &
DB TC_RW + IRW_SQuote ; '
DB TC_RW + IRW_LParen ; (
DB TC_RW + IRW_RParen ; )
DB TC_RW + IRW_Mult ; *
DB TC_RW + IRW_Add ; +
DB TC_RW + IRW_Comma ; ,
DB TC_RW + IRW_Minus ; -
DB TC_PERIOD ; .
DB TC_RW + IRW_Div ; /
DB TC_NUM ; 0
DB TC_NUM ; 1
DB TC_NUM ; 2
DB TC_NUM ; 3
DB TC_NUM ; 4
DB TC_NUM ; 5
DB TC_NUM ; 6
DB TC_NUM ; 7
DB TC_NUM ; 8
DB TC_NUM ; 9
DB TC_RW + IRW_Colon ; :
DB TC_RW + IRW_SColon ;
DB TC_RW + IRW_LT ; <
DB TC_RW + IRW_EQ ; =
DB TC_RW + IRW_GT ; >
DB TC_PRINT ; ?
DB TC_SPECIAL ; @
DB TC_UCASE ; A
DB TC_UCASE ; B
DB TC_UCASE ; C
DB TC_UCASE ; D
DB TC_UCASE ; E
DB TC_UCASE ; F
DB TC_UCASE ; G
DB TC_UCASE ; H
DB TC_UCASE ; I
DB TC_UCASE ; J
DB TC_UCASE ; K
DB TC_UCASE ; L
DB TC_UCASE ; M
DB TC_UCASE ; N
DB TC_UCASE ; O
DB TC_UCASE ; P
DB TC_UCASE ; Q
DB TC_UCASE ; R
DB TC_UCASE ; S
DB TC_UCASE ; T
DB TC_UCASE ; U
DB TC_UCASE ; V
DB TC_UCASE ; W
DB TC_UCASE ; X
DB TC_UCASE ; Y
DB TC_UCASE ; Z
DB TC_RW + IRW_LParen ; map '[' to '(' for BASICA compatibility
DB TC_RW + IRW_Idiv ; \
DB TC_RW + IRW_RParen ; map ']' to ')' for BASICA compatibility
DB TC_RW + IRW_Pwr ; ^
DB TC_SPECIAL; _ (underscore)
DB TC_SPECIAL ; `
DB TC_LCASE ; a
DB TC_LCASE ; b
DB TC_LCASE ; c
DB TC_LCASE ; d
DB TC_LCASE ; e
DB TC_LCASE ; f
DB TC_LCASE ; g
DB TC_LCASE ; h
DB TC_LCASE ; i
DB TC_LCASE ; j
DB TC_LCASE ; k
DB TC_LCASE ; l
DB TC_LCASE ; m
DB TC_LCASE ; n
DB TC_LCASE ; o
DB TC_LCASE ; p
DB TC_LCASE ; q
DB TC_LCASE ; r
DB TC_LCASE ; s
DB TC_LCASE
DB TC_LCASE ; u
DB TC_LCASE ; v
DB TC_LCASE ; w
DB TC_LCASE ; x
DB TC_LCASE ; y
DB TC_LCASE ; z
DB TC_SPECIAL ; {
DB TC_SPECIAL ; |
DB TC_SPECIAL ; }
DB TC_SPECIAL ; ~
DB TC_SPECIAL ; DEL
subttl FindRw
;================ External Procedures Referenced by this module ========
;***************************************************************************
; NEAR BOOL FindRw(pToken, pSym, cbSym, fStr)
; Purpose:
; Search the reserved word table for a specified symbol.
;
; Entry:
; 'pToken' points to descriptor for current token being fetched
; 'pSym' points to the 1st byte of the symbol to search for.
; 'cbSym' = number of bytes in symbol (always assumed to be > 1)
; 'fStr' is true if the symbol was terminated by "$".
; [ES] = [DS]
; NOTE: This assumes reserved word table is ordered ALPHABETICALLY
;
; Exit:
; If the reserved word was not found,
; returns with carry clear
; else
; returns with carry set
; The fields of the token structure are filled in with the description
; of the reserved word.
; [11]In EB FindRw may also determine that the string passed to it is a
; command equivalent name in which case the token structure is filled
; in describing it.
;
;*******************************************************************************
; Register allocation within procedure:
; si is always pointing to next byte in reserved word table
; di = temporary copy of pbSym
; cx = temporary copy of cbSym
cProc FindRw <PUBLIC,NEAR>, <si,di>
ParmW pToken ;ptr to token being built
ParmW pSym ;ptr to 1st byte of name to search for
ParmW cbSym ;byte count of name to search for
ParmB fStr ;TRUE if name ends with '$'
localW nStmts ;num statements that begin with
; this reserved word
cBegin FindRw
mov si,[pSym] ;si points to 1st letter of id
lodsb ;al = 1st letter of id
dec [cbSym] ;decrement byte count for same reason
sub al,'A' ;al = (0..25) for (A..Z)
cbw
cmp al,'Z'-'A'
ja WordNotFound1 ;brif first letter can not start RW
shl ax,1 ;ax = word index into tRw
xchg si,ax ;si = index into tRw
mov si,WORD PTR cs:tRw[si] ;si->res word table for 1st letter of id
;======================================================
; NOTE: Beginning of block which has DS->CP (not DGROUP)
; If any static variables need be accessed in this
; block, access them with an ES override.
;======================================================
push cs
pop ds ;ds = Code Segment (CP)
assumes DS,CP
lodsw ;[ax] = res word index for 1st entry
mov bx,ax ;iRw = res word index for 1st entry
mov cx,[cbSym] ;initialize
jmp SHORT NextEntry
;Register usage:
; dx points to next entry in reserved word table
; bx contains the res word index for current symbol
; si points into current reserved word table entry
;
CbsInWord:
lodsw ;al = #bytes of attributes
;ah = #bytes in reserved word
jmp SHORT GotCbs
TryNextWord:
mov cx,[cbSym] ;cx = byte count for that name
TryNextWordCX:
inc bx
mov si,dx ;si points to start of next entry
NextEntry:
lodsb ;al = size of next entry
; high 4 bits = #bytes in name
; low 4 bits = #bytes of attributes
or al,al
jz WordNotFound ;brif end of this res word table
inc al
jz CbsInWord ;brif counts are in following word
dec al
mov ah,al ;ah = #bytes in name * 16
and al,0FH ;al = #bytes of attributes
shr ah,1 ;ah = #bytes in name * 8
shr ah,1 ;ah = #bytes in name * 4
shr ah,1 ;ah = #bytes in name * 2
shr ah,1 ;ah = #bytes in name
GotCbs:
mov dl,ah ;dl = #bytes in name
add dl,al ;dl = #bytes in name & attributes
sub dh,dh ;dx = #bytes in name & attributes
add dx,si ;dx->next entry
cmp ah,cl
jne TryNextWordCX ;brif name lengths not equal
mov di,[pSym]
inc di ;di->2nd byte of name to search for
rep cmpsb ;compare pSym with res word entry
jc TryNextWord ;brif pSym is alphabetically > this
;entry in res word tbl.
jne WordNotFound ;brif names are not identical
;and pSym is alphabetically < this
;entry in res word tbl.
lodsb ;al = flags byte
sub ah,ah ;ah = STRING:FALSE
test al,RWF_STR
je NotStr
mov ah,1 ;ah = STRING:TRUE
NotStr:
cmp ah,[fStr]
jne TryNextWord ;brif one name ends with $ and
; the other doesn't (no match)
;At this point, we have found a match in the reserved word table.
;
push ss
pop ds ;restore DS = DGROUP
assumes DS,DGROUP
;======================================================
; NOTE: End of block which has DS->CP
;======================================================
mov di,[pToken] ;di -> token descriptor being built
mov [di.TOK_class],CL_RESWORD
mov [di.TOK_rw_rwf],ax ;pToken->dsc.rwf = res word flags
test al,RWF_OPERATOR
mov ax,UNDEFINED
je NotOper ;brif res word is not an operator
lods BYTE PTR cs:[si] ;al = IOP_xxx index for res word
sub ah,ah ;ax = IOP_xxx index for res word
NotOper:
mov [di.TOK_rw_iOperator],ax;save it in pToken
mov [di.TOK_rw_iRw],bx ;save res word's unique index
mov [di.TOK_rw_pArgs],si ;save ptr to func/stmt args
stc ;tell caller reserved word was found
jmp SHORT FindRwExit ;exit function
WordNotFound:
push ss
pop ds ;restore DS = DGROUP
WordNotFound1:
clc ;tell caller reserved word not found
FindRwExit:
cEnd FindRw
subttl FetchToken
;*********************************************************************
; VOID PLM NEAR FetchToken(di:pToken)
;
; Purpose:
; This is the lexical analyzer for BASIC. It scans
; ASCII text and sets fields in the structure pToken
; which identify the scanned token.
; It filters out white space such as tabs, newlines, and spaces.
;
; Entry:
; di (pToken) points to the receiving token descriptor structure.
; ps.bdpSrc is a buffer descriptor which holds current line being parsed
;
; Exit:
; The structure pointed to by pToken is filled in
; id:
; pToken->class = CL_ID
; pToken->dsc.id.oNam = ONamOfPbCb offset for the identifier.
; pToken->dsc.id.oTyp = explicit type which followed
; identifier (ET_IMP for implicit, ET_I2 for % ET_R8 for # etc.)
; pToken->dsc.id.charFirst = [0..25] if 1st letter is [A..Z]
; used to determine default type for implicit variable references
; pToken->dsc.id.vmFlags has one or more of the following bits set:
; FVI_FNNAME if id began with FN
; (Even though MakeVariable ignores value of FVI_FNNAME in
; mkVar.flags, we set it for parser's own internal use).
; pToken->dsc.id.lexFlags has one or more of the following bits set:
; FLX_hasPeriod if id has '.' in it
; lit:
; pToken->class = CL_LIT
; pToken->dsc.lit.type = type of value (ET_I2, ET_SD etc.)
; pToken->dsc.lit.litType = type of literal (LIT_I2, LIT_H2, LIT_J2 etc.)
; pToken->dsc.lit.value.I2 = value of 2 byte integer.
; pToken->dsc.lit.value.I4 = value of 4 byte integer.
; pToken->dsc.lit.value.R4 = value of 4 byte real.
; pToken->dsc.lit.value.R8 = value of 8 byte real.
; pToken->dsc.lit.value.cbStr = number of bytes in string literal.
;
; Algorithm:
; From first character, determine what class the token is,
; and then take class specific action to consume it.
;
;*********************************************************************
cProc FetchToken <PUBLIC,NEAR>, <si>
localV namBuf,CB_IDNAM_MAX+2
localW cbId
localW endOfGO
localW tokFlags
localB firstChar
localB lastChar
localB charType
cBegin FetchToken
DbChkPsStk ;see if this is a new high-water stack
and [psFlags],NOT PSIF_fLexPeriodOk
push ds ;set up es register for string copy
pop es
mov si,[ps.PS_bdpSrc.BDP_pbCur]
SkipWhiteSpace_0:
mov ax,si ;ax -> next source byte to be fetched
sub ax,[ps.PS_bdpSrc.BDP_pb]
mov [di.TOK_oSrc],ax ;pToken->oSrc = ps.bdpSrc.pbCur -
; ps.bdpSrc.pb tells parser where
; token began in source buffer
sub ah,ah ;ax = al (code below assumes ah=0)
jmp short SkipWhiteSpace_1
SkipWhiteSpace:
inc [di.TOK_oSrc] ;Just do this if we know we just read
; a single char
SkipWhiteSpace_1:
lodsb ;al = next source byte
mov dl,al ;save current char in dl
or al,al
js NotASCII2 ;branch if al is not a 7-bit ASCII char
mov bx,CPOFFSET typChar ;bx->table to map char to char-class
xlat BYTE PTR cs:[bx] ;al = ax = character type
NotASCII1:
cmp al,TC_RW
jnb ResWord ;brif this is a 'res word' char
; like +,* etc.
; At this point we have either an alpha, a numeric, white space,
; end-of-line character, or a special character which BASIC doesn't consider
; part of its character set. Dispatch based on the character-class.
; This can result in a numeric literal, id, reserved word, or special char.
; ax = character type, dl = char
;
NotResWord:
mov bx,ax
shl bx,1 ;bx = index into dispatch table
jmp [bx + CaseJmpTbl] ;dispatch to case
ResWord:
;We have a non-alpha-numeric char. Try to map it
; to a reserved word/operator
sub al,TC_RW ;al = ax = IRW (res word index)
mov [di.TOK_rw_iRw],ax ;save it in token descriptor
mov [di.TOK_class],CL_RESWORD
mov [di.TOK_rw_rwf],0 ;res word is not a stmt/func
mov bx,OFFSET CP:mpIRWtoIOP ;bx -> table for mapping RW to IOP
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -