📄 prslex.asm
字号:
xlat BYTE PTR cs:[bx] ;al = IOP (operator) index for res word
;ax = IOP since ah is still 0
or al,al
js NotOperator ;brif this res word char isn't an
; operator
mov [di.TOK_rw_iOperator],ax
jmp UpdateSrcPtr ;save si in ps.PS_bdpSrc.BDP_pbCur and exit
NotASCII2:
mov al,TC_SPECIAL ;all chars >127 are special chars
jmp SHORT NotASCII1
;The special character we parsed is not an operator,
; Try '&<number>' or "<quoted string literal>"
; dl = current source char, its not an operator
NotOperator:
mov [di.TOK_rw_iOperator],UNDEFINED
cmp dl,'&'
je GotLit ;brif numeric literal like &Hffff
cmp dl,QUOTE
jne J_UpdateSrcPtr ;branch if current char is not "
mov dx,si ;dx points to 1st char in literal
LoopInString:
lodsb ;al = next byte of source
cmp al,QUOTE
je EndOfString
or al,al
jne LoopInString ;brif we're not at end-of-line
dec si ;so next call to FetchToken will
; re-fetch end-of-line
dec dx ;adjust dx so length comes out right
EndOfString:
mov ax,si ;ax points beyond last char of literal
sub ax,dx ;ax = # chars since start of token
dec ax ;don't count the last double quote
mov WORD PTR [di.TOK_lit_value_cbStr],ax
mov [di.TOK_class],CL_LIT
mov [di.TOK_lit_type],ET_SD
mov [di.TOK_lit_litType],LIT_STR
mov WORD PTR ([di.TOK_lit_errCode]),0 ;set errCode and flags field
.errnz TOK_lit_flags - TOK_lit_errCode - 1
J_UpdateSrcPtr:
jmp UpdateSrcPtr ;save si in ps.PS_bdpSrc.BDP_pbCur and exit
sEnd CP
sBegin DATA
CaseJmpTbl LABEL WORD
dw Got_TC_ALPHA ;Upper Case
dw Got_TC_ALPHA ;Lower Case Alpha
dw GotLit ;0-9
dw Got_TC_PERIOD ;.
dw Got_TC_NEWLINE ;End-of-line Character
dw SkipWhiteSpace ;White Space Character
dw Got_TC_SPECIAL ;Special Character
dw Got_TC_PRINT ;? = shorthand for "PRINT"
sEnd DATA
sBegin CP
assumes CS,CP
;got a "." at start of token. It may be a number, or a record separator
Got_TC_PERIOD:
mov al,[si]
cmp al,'0'
jb Got_TC_SPECIAL ;brif char after "." isn't 0-9
cmp al,'9'
ja Got_TC_SPECIAL ;brif char after "." isn't 0-9
;0-9 or .
GotLit:
;Call ScanLit, trapping runtime overflow errors
mov WORD PTR [di.TOK_lit_errCode],0 ;zero errCode and flags
.errnz TOK_lit_flags - TOK_lit_errCode - 1
PUSHI ax,<CODEOFFSET ScanLit>
call CallRtTrap_CODE ; al = 0 if no error, else ER_OV
; in QBJ version ax may be error
; for bad KANJI constant
mov [di.TOK_lit_errCode],al ;return error code
jmp UpdateSrcPtr ;save si in ps.PS_bdpSrc.BDP_pbCur and exit
;End-of-line Character
Got_TC_NEWLINE:
mov [di.TOK_class],CL_RESWORD
mov [di.TOK_rw_iRw],IRW_NewLine
mov [di.TOK_rw_rwf],0 ;res word is not an operator/stmt/func
mov [di.TOK_rw_iOperator],UNDEFINED
jmp ExitFetchToken ;don't save new si in
;ps.PS_bdpSrc.BDP_pbCur so next call to
;FetchToken will get the same token
;until a new line is read into ps
;Special Character
Got_TC_SPECIAL:
mov [di.TOK_class],CL_UNKNOWNCHAR
mov [di.TOK_unknownChar_unknownChar],dl
cmp dl,'_'
jne NotLineCont ;brif not underscore
FLoadActive
je NotLineCont ;brif not loading a file
dec si ;don't include _ in logical line
mov bx,dataOFFSET ps.PS_bdpSrc ;pass pbd in bx
sub si,[bx.BD_pb] ;si = #bytes in buffer before _
mov cx,si ;pass cbAppend in cx
call GetLineBd ;append next physical line to current
; buffer
SkipWhiteSpace_2:
mov [di.TOK_oSrc],si ;update current offset
add si,[ps.PS_bdpSrc.BDP_pb] ;si = ptr to next byte after _
sub ah,ah ;ax = al (SkipWhiteSpace_1 assumes ah=0)
jmp SkipWhiteSpace_1
NotLineCont:
jmp UpdateSrcPtr ;save si in ps.PS_bdpSrc.BDP_pbCur and exit
;While scanning a token, we found a FN followed by a space.
;In all versions of BASIC, FN xxx is treated the same as FNxxx.
;Squeeze source of ' 'out of pcode buffer
;because we need to call ONamOfPbCb with case sensitive FNxxx
;BdShiftLeft((bd *)&ps.bdpSrc, oSpace, 1)
;
FNspace:
pop di ;restore di points to token descriptor
PUSHI ax,<dataOFFSET ps.PS_bdpSrc>
mov ax,si
dec ax ;ax points to space
sub ax,[ps.PS_bdpSrc.BDP_pb] ;ax = offset to space
push ax ;pass oSpace
PUSHI ax,1 ;shift 1 byte left
call BdShiftLeft ;causes no heap movement
mov si,[di.TOK_oSrc] ;si points to start of FNxxx
jmp SkipWhiteSpace_2 ;re-scan token
;Convert '?' into "PRINT"
Got_TC_PRINT:
push di ;save ptr to result token descriptor
lea di,[namBuf] ;di -> temporary name buffer
mov ax,'RP' ;namBuf = "PRINT"
stosw
mov ax,'NI'
stosw
mov al,'T'
stosb
mov [lastChar],al ;anything but '$'
inc si ;compensate for dec si below
mov ax,5 ;[ax] = #chars in "PRINT"
pop di ;di points to result token descriptor
jmp GotId
;Upper and Lower Case Alpha
Got_TC_ALPHA:
mov cx,CB_IDNAM_MAX+1
sub ax,ax
mov [di.TOK_id_lexFlags],al
mov [endOfGO],ax ;assume we're not looking at 'GO'
mov [tokFlags],ax ;default flags = 0
push di ;save ptr to result token descriptor
lea di,[namBuf] ;di -> temporary name buffer
mov [di],ax ;init first two bytes of namBuf
; to 0 so the "cmp [namBuf],'NF'"
; below will always work.
mov al,dl ;al = source char
IdLoop0:
mov bx,CPOFFSET typChar ;bx->table to map char to char-class
GotInternational:
;Loop to convert id to upper case and copy to temp buffer
; In the case of GO TO or GO SUB, we copy the entire reserved word
; into namBuf, squeezing out any white space after 'GO'. The goal
; is to make people who use GO TO pay more than people who use GOTO.
; At this point,
; al = UCASE(current source char)
; si points to next source char
; di points to destination of next char of id
; bx points to typChar table
; cx = max number of chars left in identifier
; es = ds = DGROUP
;
IdLoop:
and al,0DFh ;force to upper case
;Note that it is OK to do this even
;if char is numeric, since no res words
;contain numerics, if it is numeric,
;it will never match.
stosb ;store UCASE(source char) in temp buf
lodsb ;al = next source char
dec cx
jcxz IdTooLong ;branch if name length exceeded
IdLoop1:
or al,al ;test for non-ascii char ( > 127)
js GotNonASCII ;branch if we got a non ASCII char
mov dl,al ;save current char in dl
xlat BYTE PTR cs:[bx] ;al = ax = character type
cmp al,TC_PERIOD
xchg ax,dx ;dl = char type, al=char
jb IdLoop ;brif got an alpha or numeric
jne EndOfId ;brif didn't get a "."
test [psFlags],PSIF_fPeriodOk OR PSIF_fLexPeriodOk
jne IdLoop ;brif "." can be part of id token
cmp [nambuf],'NF'
je IdLoop ;brif FNxxx.xxx (def fn ids can have
; periods in them)
GotNonASCII:
mov dl,TC_SPECIAL
;al = char which terminated id (may be '$','#' etc.)
;dl = type of last char (TC_xxx)
;si points beyond char which terminated string
;
EndOfId:
mov [lastChar],al ;save last char
mov [charType],dl ;save type of terminator
mov ax,CB_IDNAM_MAX+1
sub ax,cx ;ax = # chars in id
mov dx,[nambuf]
cmp al,2
jne NotGO ;branch if id can't be 'GO'
cmp dx,'OG' ;in namBuf, least sig byte comes last,
; so GO == 'OG'
jne NotGO ;branch if id isn't 'GO'
cmp [lastChar],' '
jne NotGO ;branch if not 'GO' followed by space
mov [endOfGO],si ;save ptr to end of 'GO'
;Skip white space, then try to find GO TO or GO SUB
SkipSpc:
lodsb
cmp al,' '
jne IdLoop1
jmp SHORT SkipSpc
;Got an id which was too long - generate tricky error message.
;If we didn't call ParseErr before PErrMsg_AX, PErrMsg_AX would call NtEndStmt
;which would recursively call FetchToken, which would be bad news.
;
IdTooLong:
pop bx ;bx points to result token descriptor
; for error column reporting
push bx ;re-save it on stack
mov ax,PSERR_fAlert + MSG_IdTooLong
call IdErr
mov cx,1 ;only pass 40 byte name to ONamOfPbCb
mov al,[si]
jmp SHORT EndOfId ; it can't handle anything longer
;Got 'FN' as start of name
;di points beyond of name in nambuf, ax=length of name, dx = [nambuf]
FNid:
cmp al,2
jne NotFN2 ;brif we got more than just FN
cmp [lastChar],' '
jne IllegalFNid ;brif FN not followed by ' '
; will get error like "Expected FNid"
jmp FNspace ;treat FN x like FNx
IllegalFNid:
pop di ;di points to result token descriptor
mov bx,di
mov ax,PSERR_fAlert + MSG_BadId
call IdErr
jmp SkipWhiteSpace_0 ;re-scan token
;Got FNxxx, not just FN
NotFN2:
or [tokFlags],FVI_FNNAME ;set FVI_FNNAME bit in tok.id.flags
mov dl,[namBuf+2] ;dl = UCASE(1st char beyond FN)
cmp dl,'A'
jae NotGO_Cont ;brif got FN<letter>xxx
cmp dl,('.' AND 0DFh) ;test if char was a '.' before
; anding with '.'
jne IllegalFNid ;brif got FN<digit>xxx
mov dl,26+'A' ;'.' maps to 26 (Z+1)
; FN.xxx is always single precision
jmp short NotGO_Cont
;di points beyond of name in nambuf, ax=length of name, dx = [nambuf]
NotGO:
cmp dx,'NF' ;test for 'FN' (FN == 'NF' because
; in namBuf, least sig byte comes last)
je FNid ;brif got 'FN'
;dl=1st letter of name (or 3rd letter of FNname)
;ax=length of name, [lastChar]=terminator
NotGO_Cont:
pop di ;di points to result token descriptor
mov [firstChar],dl
;di points to token descriptor, ax=length of name, [lastChar]=terminator
GotId:
mov [cbId],ax ;save byte count of id
cmp al,1
je NotResWordId ;1 letter id can't be res word
cmp [lastChar],'.'
je NotResWordId ;res word can't be followed by period
push di ;pass ptr to token descriptor to FindRw
lea bx,namBuf
push bx ;pass ptr to UCASE(1st char)
push ax ;pass char count
sub ax,ax ;fStr = FALSE
cmp [lastChar],'$'
jne CallFindRw ;brif id terminator <> '$'
inc ax ;fStr = TRUE
CallFindRw:
push ax ;pass fStr
call FindRw
jnc NotResWordId
;Got a valid reserved word
cmp [lastChar],'$'
je J1_UpdateSrcPtr ;brif res word not terminated by '$'
dec si ;next FetchChar will refetch terminator
J1_UpdateSrcPtr:
jmp short UpdateSrcPtr ;save si in ps.PS_bdpSrc.BDP_pbCur and exit
;[tokFlags] = tok.id.flags
NotResWordId:
mov al,[firstChar]
sub al,'A' ;map A..Z to 0..25, period to 26
mov [di.TOK_id_charFirst],al;so parser can set default type
; Also used for DEFxxx A-X
mov ax,[tokFlags]
mov [di.TOK_id_vmFlags],ax
mov cx,[endOfGO]
jcxz NotGoId ;brif we didn't parse 'go id'
mov si,cx ;si -> 1st byte after 'go '
mov [cbId],2
jmp SHORT GotImplicitId
NotGoId:
;following code assumes IRW_EtInteger...IRW_<last expl. type char>
; are contiguous
mov al,[charType]
sub al,TC_RW + IRW_EtInteger
cmp al,CBASETYPEMAX - 1 ;cmp al to number of explicit types
jb GotExplicitType
GotImplicitId:
dec si ;next FetchChar will refetch terminator
mov al,ET_IMP ;can't be $,%,&,!, or #
jmp SHORT SaveType
GotExplicitType:
add al,ET_I2
SaveType:
sub ah,ah
mov [di.TOK_id_oTyp],ax
mov ax,[ps.PS_bdpSrc.BDP_pb]
mov [ps.PS_bdpSrc.BDP_pbCur],si ;NOTE: ONamOfPbCb can cause heap to move
; so save current text pointer
add ax,[di.TOK_oSrc] ;ax points to 1st byte of id
; (parm to ONamOfPbCb)
mov cx,[cbId] ;#bytes in id - also parm to ONamOfPbCb
test [ps.PS_flags],PSF_fParseExp
jne DontSaveCase ;brif called to parse a Watch Expression
test [psFlags],PSIF_NoCaseChg
jne DontSaveCase
cmp [grs.GRS_fDirect],FALSE
je SaveCase ;brif not parsing direct mode stmt
DontSaveCase:
or ch,80h ;highbit says don't change case
; of existing namtbl id, so user
; can type alpha in the command window
; and it doesn't change Alpha to
; alpha in his list window.
SaveCase:
call ONamOfPbCb ;nammgr returns ax = oNam, dl = flags
je ONamErr ;brif ONamOfPbCb failed
mov [di.TOK_id_oNam],ax ;save nammgr's handle for the name
cmp [lastChar],'.'
je IdPeriod
NotIdPeriod:
mov [di.TOK_class],CL_ID
mov al,[lastChar]
mov [di.TOK_id_termChar],al
jmp SHORT ExitFetchToken
;save si in ps.PS_bdpSrc.BDP_pbCur and exit
UpdateSrcPtr:
mov [ps.PS_bdpSrc.BDP_pbCur],si
ExitFetchToken:
cEnd FetchToken
ONamErr:
call ParseErrOm ;tell ParseLine to return out-of-mem err
J1_Got_TC_NEWLINE:
jmp Got_TC_NEWLINE ;return new-line - ends parsing of line
;ax = oNam of id which was terminated with "."
IdPeriod:
test [psFlags],PSIF_fNoPeriod ;don't stop for periods
jne NotIdPeriod ;brif scanning id which cannot have .
test dl,NM_fAS ;dl still set from ONamOfPbCb call
jne NotIdPeriod ;brif id has been seen in "x AS" clause
; in which case x.y is 3 tokens
;set PSIF_fLexPeriodOk so IdLoop won't stop for periods
;set PSIF_fLineHasPeriodId so ParseLine knows line has 'a.b' id in line
; so it will emit an opNoType at end of line - helps txtmgr
or [psFlags],PSIF_fLineHasPeriodId + PSIF_fLexPeriodOk
or [di.TOK_id_lexFlags],FLX_hasPeriod
mov si,[ps.PS_bdpSrc.BDP_pbCur] ;si points to '.' after id
mov cx,CB_IDNAM_MAX+2
mov ax,[cbId]
sub cx,ax ;cx = num bytes left
push di ;save token ptr back on stack
; so we can jump back into IdLoop
lea di,nambuf
add di,ax ;di points to destination for next byte
mov al,'.'
push ds
pop es ;es = DGROUP
jmp IdLoop0
;=====================================================================
; T O K E N S C A N N I N G F U N C T I O N S
;
; The idea is to maintain a circular queue of tokens for "Look Ahead".
; There are 4 offsets into this circular queue of particular interest:
; pTokLast points to the most recent token scanned by 'FetchToken'.
; pTokPeek points to the last token returned by 'Peek1Tok' or
; 'PeekNextTok'.
; pTokScan points to the last token returned by 'ScanTok'.
; It is the chronologically oldest token we care
; about, or the opposite end of the queue from pTokLast.
; pTokLastConsumed points to the last token consumed by 'ScanTok'.
; This token was the previous value of pTokScan. It is useful
; in some special cases for checking for syntax errors and
; generating the correct column offset.
;
;======================================================================
;*********************************************************************
; VOID NEAR ScanTok()
;
; Purpose:
; Advance 'pTokScan' to point to the next non white-space token in
; the circular token queue. Note that if PeekNextTok has already
; fetched the token we're interested in, this function does
; very little work (i.e. doesn't need to call FetchToken).
; Exit:
; bx points to current token (same as pTokScan)
;
;*********************************************************************
PUBLIC ScanTok
ScanTok PROC NEAR
inc [cTokScan] ;bump token count so NtParse
; knows to return PR_GoodSyntax
; instead of PR_NotFound
mov ax,[pStateCur]
mov [pStateLastScan],ax ;used for error reporting
mov ax,[pTokScan] ;ax points to current 'scan' token
mov [pTokLastConsumed],ax ;update pointer to last consumed tok
mov bx,ax ;bx points to current 'scan' token
add ax,CB_TOK ;advance to next token
cmp ax,dataOFFSET tLookAhead + (LOOK_AHEAD * CB_TOK)
jb ScanNoWrap ;brif if not wrapped around
mov ax,dataOFFSET tLookAhead;ax points to 1st token descriptor
ScanNoWrap:
mov [pTokScan],ax ;save ptr to new 'scan' token
;bx points to previous token of interest
;ax points to next token of interest
ScanFetch:
cmp [pTokLast],bx
jne ScanGotOne ;brif token we want has already been
; fetched by PeekNextToken
;else circular queue is empty,
; fetch a new entry
mov [pTokLast],ax ;advance pTokLast as well
push di
mov di,ax ;di points to new token
call FetchToken ;fill in token structure di
mov bx,di ;return token ptr in bx
pop di
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -