📄 parser.c
字号:
squoted = cc->squoted;
val = cc->val;
vcur = cc->vcur;
state = cc->state;
depth = cc->depth;
qdepth = cc->qdepth;
stack = cc->stack;
esc = cc->esc;
mode = cc->mode;
s = str;
if (cc->lastPos != NULL)
t = cc->lastPos;
else {
t = s;
cc->sbuffer = str;
}
} else {
/* new continuation... */
cc = (pcont_t *)malloc(sizeof(pcont_t));
assert(cc != NULL);
cc->mode = mode;
/* allocate atom buffer */
cc->val = val = (char *)malloc(sizeof(char)*sexp_val_start_size);
assert(val != NULL);
cc->val_used = val_used = 0;
cc->val_allocated = val_allocated = sexp_val_start_size;
vcur = val;
/* allocate stack */
cc->stack = stack = make_stack();
cc->bindata = NULL;
cc->binread = cc->binexpected = 0;
/* t is temp pointer into s for current position */
s = str;
t = s;
cc->sbuffer = str;
}
bufEnd = cc->sbuffer+len;
/* guard for loop - see end of loop for info. Put it out here in the
event that we're restoring state from a continuation and need to
check before we start up. */
if (state != 15 && t[0] == '\0') keepgoing = 0;
/*==================*/
/* main parser loop */
/*==================*/
while (keepgoing == 1 && t != bufEnd)
{
#ifdef _DEBUG_
printf("PARSER: STATE=%d CURCHAR=%c (0x%x)\n",state,t[0],t);
printf("MODE: %d\n",mode);
printf(" VAL_ALLOCATED=%d VAL_USED=%d\n",val_allocated,val_used);
fsm_iterations++;
#endif
/* based on the current state in the FSM, do something */
switch (state)
{
case 1:
switch (t[0])
{
/* space,tab,CR,LF considered white space */
case '\n':
case ' ':
case '\t':
case '\r':
t++;
break;
/* semicolon starts a comment that extends until a \n is
encountered. */
case ';':
t++;
state = 11;
break;
/* enter state 2 for open paren */
case '(':
state = 2;
t++;
break;
/* enter state 3 for close paran */
case ')':
state = 3;
break;
/* begin quoted string - enter state 5 */
case '\"':
state = 5;
/* set cur pointer to beginning of val buffer */
vcur = val;
t++;
break;
/* single quote - enter state 7 */
case '\'':
state = 7;
t++;
break;
/* other characters are assumed to be atom parts */
default:
/* set cur pointer to beginning of val buffer */
vcur = val;
/** NOTE: the following code originally required a transition
to state 4 before processing the first atom character --
this required two iterations for the first character
of each atom. merging this into here allows us to process
what we already know to be a valid atom character before
entering state 4. **/
vcur[0] = t[0];
if (t[0] == '\\') esc = 1;
else esc = 0;
val_used++;
if (val_used == val_allocated) {
val = (char *)realloc(val,val_allocated+sexp_val_grow_size);
assert(val != NULL);
vcur = val + val_used;
val_allocated += sexp_val_grow_size;
} else vcur++;
/* if the atom starts with # and we're in inline
binary mode, we need to go to state 12 to start
checking for the #b# prefix. otherwise,
if it's not a # or we're just in normal mode,
proceed to state 4 as usual. */
if (t[0] == '#' && mode == PARSER_INLINE_BINARY) {
state = 12;
} else {
state = 4;
}
t++;
break;
}
break;
case 2:
/* open paren */
depth++;
sx = sexp_t_allocate();
assert(sx!=NULL);
elts++;
sx->ty = SEXP_LIST;
sx->next = NULL;
sx->list = NULL;
if (stack->height < 1)
{
data = pd_allocate();
assert(data!=NULL);
data->fst = data->lst = sx;
push (stack, data);
}
else
{
data = (parse_data_t *) top_data (stack);
if (data->lst != NULL)
data->lst->next = sx;
else
data->fst = sx;
data->lst = sx;
}
data = pd_allocate();
assert(data!=NULL);
data->fst = data->lst = NULL;
push (stack, data);
state = 1;
break;
case 3:
/** close paren **/
#ifdef _DEBUG_
if (depth > maxdepth) maxdepth = depth;
#endif /* _DEBUG_ */
/* check for close parens that were never opened. */
if (depth == 0) {
fprintf(stderr,"Badly formed s-expression. Parser exiting.\n");
cc->val = val;
cc->mode = mode;
cc->val_used = val_used;
cc->val_allocated = val_allocated;
cc->vcur = vcur;
cc->lastPos = t;
cc->depth = depth;
cc->qdepth = qdepth;
cc->state = 1;
cc->stack = stack;
cc->esc = 0;
cc->last_sexp = NULL;
cc->error = 1;
#ifdef _DEBUG_
fprintf(stderr,"State Machine Iterations: %d\nMaxdepth: %d\n",
fsm_iterations,maxdepth);
#endif /* debug */
return cc;
}
t++;
depth--;
lvl = pop (stack);
data = (parse_data_t *) lvl->data;
sx = data->fst;
/* free (data); */
pd_deallocate(data);
lvl->data = NULL;
if (stack->top != NULL)
{
data = (parse_data_t *) top_data (stack);
data->lst->list = sx;
}
else
{
fprintf (stderr, "Hmmm. Stack->top is null.\n");
}
state = 1;
/** if depth = 0 then we finished a sexpr, and we return **/
if (depth == 0) {
cc->error = 0;
cc->mode = mode;
cc->val = val;
cc->val_allocated = val_allocated;
cc->val_used = val_used;
cc->vcur = vcur;
cc->lastPos = t;
cc->depth = depth;
cc->qdepth = qdepth;
cc->state = 1;
cc->stack = stack;
cc->esc = 0;
while (stack->top != NULL)
{
lvl = pop (stack);
data = (parse_data_t *) lvl->data;
sx = data->fst;
/* free (data); */
pd_deallocate(data);
lvl->data = NULL;
}
cc->last_sexp = sx;
#ifdef _DEBUG_
fprintf(stderr,"State Machine Iterations: %d\nMaxdepth: %d\n",
fsm_iterations,maxdepth);
#endif /* debug */
return cc;
}
break;
case 4: /** parsing atom **/
if (esc == 1 && (t[0] == '\"' || t[0] == '(' ||
t[0] == ')' || t[0] == '\'' ||
t[0] == '\\')) {
vcur--; /* back up to overwrite the \ */
vcur[0] = t[0];
vcur++;
t++;
esc = 0;
break;
}
/* look at an ascii table - these ranges are the non-whitespace, non
paren and quote characters that are legal in atoms */
if (!((t[0] >= '*' && t[0] <= '~') ||
((unsigned char)(t[0]) > 127) ||
(t[0] == '!') ||
(t[0] >= '#' && t[0] <= '&')))
{
vcur[0] = '\0';
val_used++;
sx = sexp_t_allocate();
assert(sx!=NULL);
elts++;
sx->ty = SEXP_VALUE;
sx->val = val;
sx->val_allocated = val_allocated;
sx->val_used = val_used;
sx->next = NULL;
if (squoted != 0)
sx->aty = SEXP_SQUOTE;
else
sx->aty = SEXP_BASIC;
val = (char *)malloc(sizeof(char)*sexp_val_start_size);
assert(val != NULL);
val_allocated = sexp_val_start_size;
val_used = 0;
vcur = val;
if (!empty_stack (stack))
{
data = (parse_data_t *) top_data (stack);
if (data->fst == NULL)
{
data->fst = data->lst = sx;
}
else
{
data->lst->next = sx;
data->lst = sx;
}
}
else
{
/* looks like this expression was just a basic atom - so
return it. */
cc->mode = mode;
cc->error = 0;
cc->val = val;
cc->val_used = val_used;
cc->val_allocated = val_allocated;
cc->vcur = vcur;
cc->squoted = 0;
cc->lastPos = t;
cc->depth = depth;
cc->qdepth = qdepth;
cc->state = 1;
cc->stack = stack;
cc->esc = 0;
cc->last_sexp = sx;
assert(sx != NULL);
#ifdef _DEBUG_
fprintf(stderr,"STATE 4: CURCHAR=%c\n",vcur);
#endif /* _DEBUG_ */
return cc;
}
switch (t[0]) {
case ' ':
case '\t':
case '\n':
case '\r':
/** NOTE: we know whitespace following atom, so spin ahead
one and let state 1 do what it needs to for the next
character. **/
state = 1;
t++;
squoted = 0;
break;
case ')':
squoted = 0;
state = 3;
break;
default:
squoted = 0;
state = 1;
}
}
else
{
vcur[0] = t[0];
if (t[0] == '\\') esc = 1;
else esc = 0;
val_used++;
if (val_used == val_allocated) {
val = (char *)realloc(val,val_allocated+sexp_val_grow_size);
assert(val != NULL);
vcur = val + val_used;
val_allocated += sexp_val_grow_size;
} else vcur++;
t++;
}
break;
case 5:
if (esc == 1 && (t[0] == '\"' ||
t[0] == '\'' ||
t[0] == '(' ||
t[0] == ')' ||
t[0] == '\\')) {
vcur--;
vcur[0] = t[0];
vcur++;
/** NO NEED TO UPDATE VAL COUNTS **/
t++;
esc = 0;
}
if (t[0] == '\"')
{
state = 6;
if (squoted == 1) {
vcur[0] = '\"';
val_used++;
if (val_used == val_allocated) {
val = (char *)realloc(val,val_allocated+sexp_val_grow_size);
assert(val != NULL);
vcur = val + val_used;
val_allocated += sexp_val_grow_size;
} else vcur++;
}
vcur[0] = '\0';
val_used++;
sx = sexp_t_allocate();
assert(sx!=NULL);
elts++;
sx->ty = SEXP_VALUE;
sx->val = val;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -