mal_parser.mx

来自「一个内存数据库的源代码这是服务器端还有客户端」· MX 代码 · 共 1,808 行 · 第 1/3 页
1,808 行
@' The contents of this file are subject to the MonetDB Public License@' Version 1.1 (the "License"); you may not use this file except in@' compliance with the License. You may obtain a copy of the License at@' http://monetdb.cwi.nl/Legal/MonetDBLicense-1.1.html@'@' Software distributed under the License is distributed on an "AS IS"@' basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the@' License for the specific language governing rights and limitations@' under the License.@'@' The Original Code is the MonetDB Database System.@'@' The Initial Developer of the Original Code is CWI.@' Portions created by CWI are Copyright (C) 1997-2007 CWI.@' All Rights Reserved.@a M. L. Kersten@v 1.1@-@{@+ The Parser ImplementationThe parser (and its target language) are designed for speed of analysis.For, parsing is a dominant cost-factor in applications interfering withMonetDB. For the language design it meant that look-ahead and ambiguityis avoided where-ever possible without compromising readability andto ease debugging.The syntax layout of a MAL program consists of a module name,a list of include commands, a list of function/ pattern/ command/ factorydefinitions and concludes with the statements to be executed asthe main body of the program.  All components are optional.The program may be decorated with comments, which starts with a # andruns till the end of the current line. Comments are retainedin the code block for debugging, but can be removed with an optimizer to reduce spaceand interpretation overhead.@+ The lexical analyzerThe implementation of the lexical analyzer is straightforward:the input is taken from a client input buffer. It is assumed thatthis buffer contains the complete MIL structure to be parsed.@h#ifndef _MAL_PARSER_H#define _MAL_PARSER_H#include "mal_import.h"#define MAXERRORS 250#define CURRENT(c) (c->fdin->buf + c->fdin->pos + c->yycur)#define currChar(X) (*CURRENT(X))#define peekChar(X) (*((X)->fdin->buf + (X)->fdin->pos + (X)->yycur+1))#define nextChar(X) X->yycur++#define prevChar(X) if(X->yycur) X->yycur--mal_export void initParser(void);   /* needed in src/mal/mal.c */mal_export int parseMAL(Client cntxt, Symbol curPrg);mal_export void echoInput(Client cntxt);mal_export void debugParser(int i);mal_export str parseError(Client cntxt, str msg);mal_export void advance(Client cntxt, int length);mal_export void skipSpace(Client cntxt);mal_export void skipToEnd(Client cntxt);mal_export int idLength(Client cntxt);mal_export int stringLength(Client cntxt);mal_export str idCopy(Client cntxt, int len);mal_export str strCopy(Client cntxt, int len);mal_export int cstToken(Client cntxt, ValPtr val);mal_export int charCst(Client cntxt, ValPtr val);mal_export int operatorLength(Client cntxt);mal_export str operatorCopy(Client cntxt, int length);mal_export int keyphrase(Client cntxt, str kw, int length);mal_export int keyphrase1(Client cntxt, str kw);mal_export int keyphrase2(Client cntxt, str kw);mal_export int MALkeyword(Client cntxt, str kw, int length);mal_export int MALlookahead(Client cntxt, str kw, int length);mal_export str lastline(Client cntxt);mal_export long position(Client cntxt);#endif /* _MAL_PARSER_H */@- lexical utilitiesBefore a line is parsed we check for a request to echo it.This command should be executed at the beginning of a parserequest and each time we encounter EOL.@c#include "mal_config.h"#include "mal_parser.h"#include "mal_resolve.h"#include "mal_linker.h"#include "mal_atom.h"       /* for malAtomDefinition(), malAtomArray(), malAtomProperty() */#include "mal_interpreter.h"    /* for showErrors() */#include "mal_instruction.h"    /* for pushEndInstruction(), findVariableLength() */#include "mal_namespace.h"#include "mal_utils.h"#define FATALINPUT MAXERRORS+1#define NL(X) ((X)=='\n' || (X)=='\r')void echoInput(Client cntxt){	if (cntxt->listing & LIST_INPUT) { 		char *c = CURRENT(cntxt); 		stream_printf(cntxt->fdout,"#");		while (*c && !NL(*c)) {			stream_printf(cntxt->fdout, "%c", *c++);		}		stream_printf(cntxt->fdout, "\n");	}}INLINE void skipSpace(Client cntxt){	char *s= &currChar(cntxt);	for (;;) {		switch (*s++) {		case ' ':		case '\t':		case '\n':		case '\r':			nextChar(cntxt);			break;		default:			return;		}	}}INLINE void advance(Client cntxt, int length){	cntxt->yycur += length;	skipSpace(cntxt);}@-The most recurring situation is to recognize identifiers.This process is split into a few steps to simplify subsequentconstruction and comparison.IdLength searches the end of an identifier without changingthe cursor into the input pool.IdCopy subsequently prepares a GDK string for inclusion in theinstruction datastructures.@cshort opCharacter[256];short idCharacter[256];short idCharacter2[256];void initParser(){	int i;	for (i = 0; i < 256; i++){		idCharacter2[i]= isalpha(i) || isdigit(i);		idCharacter[i] = isalpha(i);	}	for (i = 0; i < 256; i++)	switch(i){	case '-': case '!': case '\\': case '$': case '%':	case '^': case '*': case '~': case '+': case '&':	case '|': case '<': case '>': case '=': case '/':	case ':': 		opCharacter[i]=1; 	}	idCharacter[TMPMARKER]=1;	idCharacter2[TMPMARKER]=1;}#undef isdigit#define isdigit(X)  ((X)>='0' && (X)<='9')int idLength(Client cntxt){	str s,t;	skipSpace(cntxt);	s = CURRENT(cntxt);	t=s;	if (!idCharacter[(int) (*s)])		return 0;	s++;	while (idCharacter2[(int) (*s)] ) 		s++;	return s-t;}@-Simple type identifiers can not be marked with a type variable.@cint typeidLength(Client cntxt){	int l;	str s;	skipSpace(cntxt);	s = CURRENT(cntxt);	if (!idCharacter[(int) (*s)])		return 0;	l = 1;	s++;	idCharacter[TMPMARKER] = 0;	while (idCharacter[(int) (*s)] || isdigit(*s)) {		s++;		l++;	}	idCharacter[TMPMARKER]=1;	return l;}str idCopy(Client cntxt, int length){	str s= GDKmalloc(length+1);	memcpy(s, CURRENT(cntxt),(size_t) length);	s[length]=0;	advance(cntxt,length);	return s;}int MALkeyword(Client cntxt, str kw, int length){	skipSpace(cntxt);	if (MALlookahead(cntxt, kw, length)) {		advance(cntxt, length);		return 1;	}	return 0;}int MALlookahead(Client cntxt, str kw, int length){	int i;	skipSpace(cntxt);	/* avoid double test or use lowercase only. */	if (currChar(cntxt) == *kw &&		strncmp(CURRENT(cntxt), kw, length) == 0 &&		!idCharacter[(int) (CURRENT(cntxt)[length])] &&		!isdigit((int) (CURRENT(cntxt)[length])) ) {		return 1;	}	 /* check for captialized versions */	for (i = 0; i < length; i++)		if (tolower(CURRENT(cntxt)[i]) != kw[i])			return 0;	if (!idCharacter[(int) (CURRENT(cntxt)[length])] &&		!isdigit((int) (CURRENT(cntxt)[length])) ) {		return 1;	}	return 0;}@-Keyphrase testing is limited to a few characters only(check manually). To speed this up we use a pipelined andinline macros.@cINLINE int keyphrase1(Client cntxt, str kw){	skipSpace(cntxt);	if (currChar(cntxt) == *kw) {		advance(cntxt,1);		return 1;	}	return 0;}INLINE int keyphrase2(Client cntxt, str kw){	skipSpace(cntxt);	if (CURRENT(cntxt)[0] == kw[0] && CURRENT(cntxt)[1] == kw[1]) {		advance(cntxt,2);		return 1;	}	return 0;}INLINE int keyphrase(Client cntxt, str kw,int length){	skipSpace(cntxt);	if( strncmp(CURRENT(cntxt),kw,length)== 0){		advance(cntxt,length);		return 1;	}	return 0;}@-A similar approach is used for string literals.Beware, string lengths returned include thebrackets and escapes. They are eaten away in strCopy.We should provide the C-method to split strings andconcatenate them upon retrieval[todo]@cint stringLength(Client cntxt){	int l=0;	int quote =0;	str s;	skipSpace(cntxt);	s = CURRENT(cntxt);	if( *s != '"') 		return 0;	s++;	while( *s ){		if( quote ){ 			l++; 			s++;			quote=0;		} else {			if( *s == '"' ) break;			quote= *s == '\\';			l++;			s++;		}	}	return l+2;}@-Beware, the idcmp routine uses a short cast to compare multiple bytesat once. This may cause problems when the net string length is zero.@cstr strCopy(Client cntxt, int length){	str s;	int i;	i = length<4 ? 4: length;	s = GDKzalloc(i);	if (s == 0) 		GDKfatal("FATAL:strCopy:");	memcpy(s, CURRENT(cntxt) + 1, (size_t) (length - 2) );	mal_unquote(s);	return s;}@-And a similar approach is used for operator names.A lookup table is considered, because it generally isfaster then a non-dense switch.@cint operatorLength(Client cntxt){	int l=0;	str s;	skipSpace(cntxt);	for (s = CURRENT(cntxt); *s; s++) {		if( opCharacter[(int)(*s)] ) 			l++; 		else 			return l;	}	return l;}str operatorCopy(Client cntxt, int length){	return idCopy(cntxt,length);}@-For error reporting we may have to find the start of the previous line,which, ofcourse, is easy given the client buffer.The remaining functions are self-explanatory.@cstr lastline(Client cntxt){	str s = CURRENT(cntxt);	if (NL(*s))		s++;	while (s && s > cntxt->fdin->buf && !NL(*s))		s--;	if (NL(*s))		s++;	return s;}long position(Client cntxt){   	str s = lastline(cntxt);	return (long) (CURRENT(cntxt) - s);}#if HAVE_STRTOLL && !HAVE_DECL_STRTOLLextern long long strtoll(const char *, char **, int);#endif@-Upon encountering an error we skip to the nearest semicolon,or comment terminated by a new line@cINLINE void skipToEnd(Client cntxt){	char c;	while( (c= *CURRENT(cntxt)) != ';' && c) nextChar(cntxt);	if(c) nextChar(cntxt);}@-The lexical analyser for constants is a little more complex.Aside from getting its length, we need an indication of its type.The constant structure is initialized for later use.@cint cstToken(Client cntxt, ValPtr cst){   	int i = 0;	long long l;	int hex=0;	str s = CURRENT(cntxt);	cst->vtype = TYPE_int;	switch(*s){	case '"':		cst->vtype= TYPE_str;		i= stringLength(cntxt);		cst->val.sval =strCopy(cntxt, i);		cst->len= strlen(cst->val.sval);		return i;	case '\'':		return charCst(cntxt,cst);	case '-':		i++;		s++;	case '0':	if( (s[1] == 'x' || s[1] == 'X')){		/* deal with hex */		hex= TRUE;		i+=2;		s+=2;	}	case '1': case '2': case '3': case '4': case '5':	case '6': case '7': case '8': case '9':	if( hex)		while (isdigit((int)*s) || isalpha((int)*s) ) {			if( !((tolower(*s) >= 'a' && tolower(*s) <= 'f')				|| isdigit((int)*s) )  )				break;			i++;			s++;		}	else		while (isdigit((int)*s) ) {			i++;			s++;		}	if( hex) goto handleInts;	case '.':	if (*s == '.' && isdigit(*(s+1)) ) {		i++;		s++;		while (isdigit(*s)) {			i++;			s++;		}		cst->vtype = TYPE_flt;	}	if (*s == 'e' || *s == 'E') {		i++;		s++;		if (*s == '-' || *s == '+'){			i++;			s++;		}		cst->vtype = TYPE_dbl;		while (isdigit(*s)) {			i++;			s++;		}	}	if( cst->vtype == TYPE_flt) {		int len= i;		float *pval= 0;		fltFromStr(CURRENT(cntxt), &len, &pval);		cst->val.fval= *pval;		if( pval) GDKfree(pval);	}	if( cst->vtype == TYPE_dbl){		int len= i;		double *pval= 0;		dblFromStr(CURRENT(cntxt), &len, &pval);		cst->val.dval= *pval;		if( pval) GDKfree(pval);		if( cst->val.dval> FLT_MIN && cst->val.dval<= FLT_MAX ){			cst->vtype= TYPE_flt;			cst->val.fval = (flt) cst->val.dval;		}	}	if (*s == '@') {		cst->vtype = TYPE_oid;		errno = 0;		cst->val.lval = strtoll(CURRENT(cntxt),NULL,0);		if( cst->val.lval <0 || errno== ERANGE )			cst->val.oval= oid_nil;		else			cst->val.oval= (oid) cst->val.lval;		i++;		s++;		while (isdigit(*s)) {			i++;			s++;		}		return i;	} 	if (*s == 'L') {		if( cst->vtype == TYPE_int)			cst->vtype = TYPE_lng;		if( cst->vtype == TYPE_flt)			cst->vtype = TYPE_dbl;		i++;		s++;		if (*s == 'L') {			i++;			s++;		}		if( cst->vtype == TYPE_dbl ){			int len= i;			double *pval= 0;			dblFromStr(CURRENT(cntxt), &len, &pval);			cst->val.dval= *pval;			if( pval) GDKfree(pval);		} else {			int len= i;			lng *pval= 0;			lngFromStr(CURRENT(cntxt), &len, &pval);			cst->val.lval= *pval;			if( pval) GDKfree(pval);		}		return i;	}handleInts:	if( cst->vtype == TYPE_int || cst->vtype == TYPE_lng){		l = strtoll(CURRENT(cntxt),NULL,0);		if( l> INT_MIN && l<= INT_MAX ){			cst->vtype= TYPE_int;			cst->val.ival = (int)l;		} else {			cst->vtype= TYPE_lng;			cst->val.lval = l;		}	}	return i;	case 'f':	if( strncmp(s,"false",5)==0 && !isalnum((int)*(s+5)) &&			*(s+5)!= '_'){		cst->vtype = TYPE_bit;		cst->val.cval[0] = 0;		cst->len = 1;		return 5;	}	return 0;	case 't':	if( strncmp(s,"true",4)==0 && !isalnum((int)*(s+4)) &&			*(s+4)!= '_'){		cst->vtype = TYPE_bit;		cst->val.cval[0] = 1;		cst->len = 1;		return 4;	}	return 0;	case 'n':	if( strncmp(s,"nil",3)==0 && !isalnum((int)*(s+3)) &&			*(s+3)!= '_'){		cst->vtype = TYPE_void;		cst->val.oval = oid_nil;		return 3;	}	}	return 0;}#define cstCopy(C,I)  idCopy(C,I)@- Type qualifierTypes are recognized as identifiers preceded by a colon.They may be extended with a property listand 'any' types can be marked with an alias.The type qualifier parser returns the encoded type as a short 32-bit integer. The syntax structure is@multitable @columnfractions 0.15 0.8@item typeQualifier  @tab : typeName propQualifier @item typeName@tab : scalarType | collectionType | anyType@item scalarType@tab :  ':' @sc{ identifier} @item collectionType @tab :  ':' @sc{ bat} ['[' col ',' col ']'] @item anyType@tab :  ':' @sc{ any} [typeAlias] @item col@tab :  scalarType | anyType
mal_parser.mx - 源码说明

本页面展示了「一个内存数据库的源代码这是服务器端还有客户端」中的 mal_parser.mx 源码文件，采用 MX 编程语言编写，共 1,808 行代码。您可以在线阅读完整代码内容，也可以返回资源详情页下载完整源码包进行本地学习和开发。
虫虫下载站收录了大量与内存数据库相关的技术资源，包括源代码、技术文档、电路图等，是电子工程师和嵌入式开发者的专业学习平台。
⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?