prslex.asm

来自「[随书类]Dos6.0源代码」· 汇编代码 · 共 1,428 行 · 第 1/3 页
ASM
1,428 行
	TITLE	prslex.asm - Parser's Lexical Analyzer (token fetcher)

;=============================================================================
; prslex.asm - Parser's Utility Functions
;
; Copyright <C> 1985, Microsoft Corporation
;
; Purpose:
;	Contains QBI Lexical Analyzer Functions
;
;
;=============================================================================

	include version.inc
	PRSLEX_ASM = ON
	includeOnce	architec
	includeOnce	context
	includeOnce	exint		;needed for CoR4R8
	includeOnce	heap
	includeOnce	names
	includeOnce	parser
	includeOnce	prstab
	includeOnce	prsirw
	includeOnce	psint
	includeOnce	qbimsgs
	includeOnce	rtinterp
	includeOnce	txtmgr
	includeOnce	variable	;for FVI_FNNAME [3]

	assumes DS,DGROUP
	assumes SS,DGROUP
	assumes ES,NOTHING


sBegin	DATA

	extrn	$i8_inpbas:byte		; inpbas now supported in AltMath


;List of bytes which, when they follow digits, mean its not an I2
tbFloat	label byte
	db	'.' - '0'
	db	'e' - '0'
	db	'E' - '0'
	db	'd' - '0'
	db	'D' - '0'
	db	'&' - '0'
	db	'!' - '0'
	db	'#' - '0'
	db	'%' - '0'  ;even though % means integer, its not fast-path
			   ;through the code (would slow down typical cases
			   ;to check for % in TryI2)
CB_tbFloat EQU $-tbFloat

sEnd	DATA

sBegin	CP
assumes CS,CP

QUOTE	=	022H			;quote char (")

; It is assumed that all classes of characters < TC_PERIOD are valid
; in identifiers.

OrdConstStart	0
OrdConst	TC_UCASE	;* Upper Case Alpha
OrdConst	TC_LCASE	;* Lower Case Alpha
OrdConst	TC_NUM		;* numeric (or .)
OrdConst	TC_PERIOD	;* "." (record separator / number / part of id)
OrdConst	TC_NEWLINE	;* end-of-line
OrdConst	TC_WHITE	;* white space (tab or space)
OrdConst	TC_SPECIAL	;* special character
OrdConst	TC_PRINT	;* '?' (short hand for PRINT)
OrdConst	TC_RW	;* special character reserved word

typChar LABEL	BYTE
	DB	TC_NEWLINE ; NUL
	DB	TC_SPECIAL ; SOH
	DB	TC_SPECIAL ; STX
	DB	TC_SPECIAL ; ETX
	DB	TC_SPECIAL ; EOT
	DB	TC_SPECIAL ; ENQ
	DB	TC_SPECIAL ; ACK
	DB	TC_SPECIAL ; BEL
	DB	TC_SPECIAL ; BS
	DB	TC_WHITE   ; HT
		;Tabs are expanded to spaces in GetLineBd.
		;Don't be tempted to do it at lex time because
		;if ReParse, source buffer is copied to pcode.
	DB	TC_NEWLINE ; LF
	DB	TC_SPECIAL ; VT
	DB	TC_SPECIAL ; FF
	DB	TC_NEWLINE ; CR
	DB	TC_SPECIAL ; SO
	DB	TC_SPECIAL ; SI
	DB	TC_SPECIAL ; DLE
	DB	TC_SPECIAL ; DC1
	DB	TC_SPECIAL ; DC2
	DB	TC_SPECIAL ; DC3
	DB	TC_SPECIAL ; DC4
	DB	TC_SPECIAL ; NAK
	DB	TC_SPECIAL ; SYN
	DB	TC_SPECIAL ; ETB
	DB	TC_SPECIAL ; CAN
	DB	TC_SPECIAL ; EM
	DB	TC_SPECIAL ; SUB
	DB	TC_SPECIAL ; ESC
	DB	TC_SPECIAL ; FS
	DB	TC_SPECIAL ; GS
	DB	TC_SPECIAL ; RS
	DB	TC_SPECIAL ; US

	DB	TC_WHITE ; spc
	DB	TC_RW + IRW_EtSingle ; !
	DB	TC_RW + IRW_DQuote ; "
	DB	TC_RW + IRW_Lbs ; #
	DB	TC_RW + IRW_EtString ; $
	DB	TC_RW + IRW_EtInteger ; %
	DB	TC_RW + IRW_EtLong ; &
	DB	TC_RW + IRW_SQuote ; '
	DB	TC_RW + IRW_LParen ; (
	DB	TC_RW + IRW_RParen ; )
	DB	TC_RW + IRW_Mult ; *
	DB	TC_RW + IRW_Add ; +
	DB	TC_RW + IRW_Comma ; ,
	DB	TC_RW + IRW_Minus ; -
	DB	TC_PERIOD ; .
	DB	TC_RW + IRW_Div ; /
	DB	TC_NUM ; 0
	DB	TC_NUM ; 1
	DB	TC_NUM ; 2
	DB	TC_NUM ; 3
	DB	TC_NUM ; 4
	DB	TC_NUM ; 5
	DB	TC_NUM ; 6
	DB	TC_NUM ; 7
	DB	TC_NUM ; 8
	DB	TC_NUM ; 9
	DB	TC_RW + IRW_Colon ; :
	DB	TC_RW + IRW_SColon ; 
	DB	TC_RW + IRW_LT ; <
	DB	TC_RW + IRW_EQ ; =
	DB	TC_RW + IRW_GT ; >
	DB	TC_PRINT ; ?

	DB	TC_SPECIAL ; @
	DB	TC_UCASE ; A
	DB	TC_UCASE ; B
	DB	TC_UCASE ; C
	DB	TC_UCASE ; D
	DB	TC_UCASE ; E
	DB	TC_UCASE ; F
	DB	TC_UCASE ; G
	DB	TC_UCASE ; H
	DB	TC_UCASE ; I
	DB	TC_UCASE ; J
	DB	TC_UCASE ; K
	DB	TC_UCASE ; L
	DB	TC_UCASE ; M
	DB	TC_UCASE ; N
	DB	TC_UCASE ; O
	DB	TC_UCASE ; P
	DB	TC_UCASE ; Q
	DB	TC_UCASE ; R
	DB	TC_UCASE ; S
	DB	TC_UCASE ; T
	DB	TC_UCASE ; U
	DB	TC_UCASE ; V
	DB	TC_UCASE ; W
	DB	TC_UCASE ; X
	DB	TC_UCASE ; Y
	DB	TC_UCASE ; Z
	DB	TC_RW + IRW_LParen ; map '[' to '(' for BASICA compatibility
	DB	TC_RW + IRW_Idiv ; \
	DB	TC_RW + IRW_RParen ; map ']' to ')' for BASICA compatibility
	DB	TC_RW + IRW_Pwr ; ^
	DB	TC_SPECIAL; _ (underscore)

	DB	TC_SPECIAL ; `
	DB	TC_LCASE ; a
	DB	TC_LCASE ; b
	DB	TC_LCASE ; c
	DB	TC_LCASE ; d
	DB	TC_LCASE ; e
	DB	TC_LCASE ; f
	DB	TC_LCASE ; g
	DB	TC_LCASE ; h
	DB	TC_LCASE ; i
	DB	TC_LCASE ; j
	DB	TC_LCASE ; k
	DB	TC_LCASE ; l
	DB	TC_LCASE ; m
	DB	TC_LCASE ; n
	DB	TC_LCASE ; o
	DB	TC_LCASE ; p
	DB	TC_LCASE ; q
	DB	TC_LCASE ; r
	DB	TC_LCASE ; s
	DB	TC_LCASE 
	DB	TC_LCASE ; u
	DB	TC_LCASE ; v
	DB	TC_LCASE ; w
	DB	TC_LCASE ; x
	DB	TC_LCASE ; y
	DB	TC_LCASE ; z
	DB	TC_SPECIAL ; {
	DB	TC_SPECIAL ; |
	DB	TC_SPECIAL ; }
	DB	TC_SPECIAL ; ~
	DB	TC_SPECIAL ; DEL




subttl	FindRw

;================ External Procedures Referenced by this module ========


;***************************************************************************
; NEAR BOOL FindRw(pToken, pSym, cbSym, fStr)
; Purpose:
;	Search the reserved word table for a specified symbol.
;
; Entry:
;	'pToken' points to descriptor for current token being fetched
;	'pSym' points to the 1st byte of the symbol to search for.
;	'cbSym' = number of bytes in symbol (always assumed to be > 1)
;	'fStr' is true if the symbol was terminated by "$".
;	[ES] = [DS]
;	NOTE: This assumes reserved word table is ordered ALPHABETICALLY
;
; Exit:
;	If the reserved word was not found,
;	   returns with carry clear
;	else
;	   returns with carry set
;	The fields of the token structure are filled in with the description
;	of the reserved word.
;	[11]In EB FindRw may also determine that the string passed to it is a
;	command equivalent name in which case the token structure is filled
;	in describing it.
;
;*******************************************************************************
; Register allocation within procedure:
;	si is always pointing to next byte in reserved word table
;	di = temporary copy of pbSym
;	cx = temporary copy of cbSym

cProc	FindRw	<PUBLIC,NEAR>, <si,di>
	ParmW	pToken			;ptr to token being built
	ParmW	pSym			;ptr to 1st byte of name to search for
	ParmW	cbSym			;byte count of name to search for
	ParmB	fStr			;TRUE if name ends with '$'

	localW	nStmts			;num statements that begin with
					; this reserved word

cBegin	FindRw
	mov	si,[pSym]		;si points to 1st letter of id
	lodsb				;al = 1st letter of id
	dec	[cbSym] 		;decrement byte count for same reason
	sub	al,'A'			;al = (0..25) for (A..Z) 
	cbw	
	cmp	al,'Z'-'A'		
	ja	WordNotFound1		;brif first letter can not start RW
	shl	ax,1			;ax = word index into tRw
	xchg	si,ax			;si = index into tRw
	mov	si,WORD PTR cs:tRw[si]	;si->res word table for 1st letter of id

	;======================================================
	; NOTE: Beginning of block which has DS->CP (not DGROUP)
	;	If any static variables need be accessed in this
	;	block, access them with an ES override.
	;======================================================
	push	cs
	pop	ds			;ds = Code Segment (CP)
	assumes DS,CP

	lodsw				;[ax] = res word index for 1st entry
	mov	bx,ax			;iRw = res word index for 1st entry
	mov	cx,[cbSym]		;initialize
	jmp	SHORT NextEntry

;Register usage:
; dx points to next entry in reserved word table
; bx contains the res word index for current symbol
; si points into current reserved word table entry
; 
CbsInWord:				
	lodsw				;al = #bytes of attributes
					;ah = #bytes in reserved word
	jmp	SHORT GotCbs		

TryNextWord:
	mov	cx,[cbSym]		;cx = byte count for that name
TryNextWordCX:
	inc	bx
	mov	si,dx			;si points to start of next entry
NextEntry:
	lodsb				;al = size of next entry
					; high 4 bits = #bytes in name
					; low 4 bits = #bytes of attributes
	or	al,al
	jz	WordNotFound		;brif end of this res word table
	inc	al			
	jz	CbsInWord		;brif counts are in following word
	dec	al			

	mov	ah,al			;ah = #bytes in name * 16
	and	al,0FH			;al = #bytes of attributes
	shr	ah,1			;ah = #bytes in name * 8
	shr	ah,1			;ah = #bytes in name * 4
	shr	ah,1			;ah = #bytes in name * 2
	shr	ah,1			;ah = #bytes in name
GotCbs:
	mov	dl,ah			;dl = #bytes in name
	add	dl,al			;dl = #bytes in name & attributes
	sub	dh,dh			;dx = #bytes in name & attributes
	add	dx,si			;dx->next entry
	cmp	ah,cl
	jne	TryNextWordCX		;brif name lengths not equal
	mov	di,[pSym]
	inc	di			;di->2nd byte of name to search for
	rep cmpsb			;compare pSym with res word entry
	jc	TryNextWord		;brif pSym is alphabetically > this
					;entry in res word tbl.
	jne	WordNotFound		;brif names are not identical
					;and pSym is alphabetically < this
					;entry in res word tbl.
	lodsb				;al = flags byte
	sub	ah,ah			;ah = STRING:FALSE
	test	al,RWF_STR
	je	NotStr
	mov	ah,1			;ah = STRING:TRUE
NotStr:
	cmp	ah,[fStr]
	jne	TryNextWord		;brif one name ends with $ and
					; the other doesn't (no match)

;At this point, we have found a match in the reserved word table.
;
	push	ss
	pop	ds			;restore DS = DGROUP
	assumes DS,DGROUP
	;======================================================
	; NOTE: End of block which has DS->CP
	;======================================================
	mov	di,[pToken]		;di -> token descriptor being built
	mov	[di.TOK_class],CL_RESWORD
	mov	[di.TOK_rw_rwf],ax	;pToken->dsc.rwf = res word flags
	test	al,RWF_OPERATOR
	mov	ax,UNDEFINED
	je	NotOper 		;brif res word is not an operator
	lods	BYTE PTR cs:[si]	;al = IOP_xxx index for res word
	sub	ah,ah			;ax = IOP_xxx index for res word
NotOper:
	mov	[di.TOK_rw_iOperator],ax;save it in pToken
	mov	[di.TOK_rw_iRw],bx	;save res word's unique index
	mov	[di.TOK_rw_pArgs],si	;save ptr to func/stmt args
	stc				;tell caller reserved word was found
	jmp	SHORT FindRwExit	;exit function

WordNotFound:
	push	ss
	pop	ds			;restore DS = DGROUP
WordNotFound1:
	clc				;tell caller reserved word not found
FindRwExit:
cEnd	FindRw


subttl	FetchToken

;*********************************************************************
; VOID PLM NEAR FetchToken(di:pToken)
;
; Purpose:
;  This is the lexical analyzer for BASIC.  It scans
;  ASCII text and sets fields in the structure pToken
;  which identify the scanned token.
;  It filters out white space such as tabs, newlines, and spaces.
;
; Entry:
;  di (pToken) points to the receiving token descriptor structure.
;	ps.bdpSrc is a buffer descriptor which holds current line being parsed
;
; Exit:
;  The structure pointed to by pToken is filled in
;     id:
;	 pToken->class = CL_ID
;	 pToken->dsc.id.oNam = ONamOfPbCb offset for the identifier.
;	 pToken->dsc.id.oTyp = explicit type which followed
;	    identifier (ET_IMP for implicit, ET_I2 for % ET_R8 for # etc.)
;	 pToken->dsc.id.charFirst = [0..25] if 1st letter is [A..Z]
;	    used to determine default type for implicit variable references
;	 pToken->dsc.id.vmFlags has one or more of the following bits set:
;		FVI_FNNAME if id began with FN
;		(Even though MakeVariable ignores value of FVI_FNNAME in
;		 mkVar.flags, we set it for parser's own internal use).
;	 pToken->dsc.id.lexFlags has one or more of the following bits set:
;		FLX_hasPeriod if id has '.' in it
;     lit:
;	 pToken->class = CL_LIT
;	 pToken->dsc.lit.type = type of value (ET_I2, ET_SD etc.)
;	 pToken->dsc.lit.litType = type of literal (LIT_I2, LIT_H2, LIT_J2 etc.)
;	 pToken->dsc.lit.value.I2 = value of 2 byte integer.
;	 pToken->dsc.lit.value.I4 = value of 4 byte integer.
;	 pToken->dsc.lit.value.R4 = value of 4 byte real.
;	 pToken->dsc.lit.value.R8 = value of 8 byte real.
;	 pToken->dsc.lit.value.cbStr = number of bytes in string literal.
;
; Algorithm:
;	From first character, determine what class the token is,
;	and then take class specific action to consume it.
;
;*********************************************************************

cProc	FetchToken <PUBLIC,NEAR>, <si>
	localV	namBuf,CB_IDNAM_MAX+2	
	localW	cbId
	localW	endOfGO
	localW	tokFlags
	localB	firstChar
	localB	lastChar
	localB	charType

cBegin	FetchToken
	DbChkPsStk			;see if this is a new high-water stack
	and	[psFlags],NOT PSIF_fLexPeriodOk
	push	ds			;set up es register for string copy
	pop	es
	mov	si,[ps.PS_bdpSrc.BDP_pbCur]
SkipWhiteSpace_0:
	mov	ax,si			;ax -> next source byte to be fetched
	sub	ax,[ps.PS_bdpSrc.BDP_pb]
	mov	[di.TOK_oSrc],ax	;pToken->oSrc = ps.bdpSrc.pbCur -
					; ps.bdpSrc.pb	tells parser where
					; token began in source buffer
	sub	ah,ah			;ax = al (code below assumes ah=0)
	jmp	short SkipWhiteSpace_1
SkipWhiteSpace:
	inc	[di.TOK_oSrc]		;Just do this if we know we just read
					;  a single char
SkipWhiteSpace_1:
	lodsb				;al = next source byte
	mov	dl,al			;save current char in dl
	or	al,al
	js	NotASCII2		;branch if al is not a 7-bit ASCII char

	mov	bx,CPOFFSET typChar	;bx->table to map char to char-class
	xlat	BYTE PTR cs:[bx]	;al = ax = character type
NotASCII1:
	cmp	al,TC_RW
	jnb	ResWord			;brif this is a 'res word' char
					; like +,* etc.
; At this point we have either an alpha, a numeric, white space,
; end-of-line character, or a special character which BASIC doesn't consider
; part of its character set.  Dispatch based on the character-class.
; This can result in a numeric literal, id, reserved word, or special char.
; ax = character type, dl = char
;
NotResWord:
	mov	bx,ax
	shl	bx,1			;bx = index into dispatch table
	jmp	[bx + CaseJmpTbl]	;dispatch to case

ResWord:
	;We have a non-alpha-numeric char.  Try to map it
	; to a reserved word/operator
	
	sub	al,TC_RW		;al = ax = IRW (res word index)
	mov	[di.TOK_rw_iRw],ax	;save it in token descriptor
	mov	[di.TOK_class],CL_RESWORD
	mov	[di.TOK_rw_rwf],0	;res word is not a stmt/func
	mov	bx,OFFSET CP:mpIRWtoIOP ;bx -> table for mapping RW to IOP
prslex.asm - 源码说明

本页面展示了「[随书类]Dos6.0源代码」中的 prslex.asm 源码文件，采用汇编编程语言编写，共 1,428 行代码。您可以在线阅读完整代码内容，也可以返回资源详情页下载完整源码包进行本地学习和开发。
虫虫下载站收录了大量与Dos相关的技术资源，包括源代码、技术文档、电路图等，是电子工程师和嵌入式开发者的专业学习平台。
⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?