📄 parser.c
字号:
t = s; cc->sbuffer = str; } } else { /* new continuation... */ cc = (pcont_t *)sexp_malloc(sizeof(pcont_t)); assert(cc != NULL); cc->mode = mode; /* allocate atom buffer */ cc->val = val = (char *)sexp_malloc(sizeof(char)*sexp_val_start_size); assert(val != NULL); cc->val_used = val_used = 0; cc->val_allocated = val_allocated = sexp_val_start_size; vcur = val; /* allocate stack */ cc->stack = stack = make_stack(); cc->bindata = NULL; cc->binread = cc->binexpected = 0; /* t is temp pointer into s for current position */ s = str; t = s; cc->sbuffer = str; } bufEnd = cc->sbuffer+len; /* guard for loop - see end of loop for info. Put it out here in the event that we're restoring state from a continuation and need to check before we start up. */ if (state != 15 && t[0] == '\0') keepgoing = 0; /*==================*/ /* main parser loop */ /*==================*/ while (keepgoing == 1 && t != bufEnd) {#ifdef _DEBUG_ printf("PARSER: STATE=%d CURCHAR=%c (0x%lx)\n",state,t[0],(unsigned long)t); printf("MODE: %d\n",mode); printf(" VAL_ALLOCATED=%lu VAL_USED=%lu\n",val_allocated,val_used); fsm_iterations++;#endif /* based on the current state in the FSM, do something */ switch (state) { case 1: switch (t[0]) { /* space,tab,CR,LF considered white space */ case '\n': case ' ': case '\t': case '\r': t++; break; /* semicolon starts a comment that extends until a \n is encountered. */ case ';': t++; state = 11; break; /* enter state 2 for open paren */ case '(': state = 2; t++; break; /* enter state 3 for close paran */ case ')': state = 3; break; /* begin quoted string - enter state 5 */ case '\"': state = 5; /* set cur pointer to beginning of val buffer */ vcur = val; t++; break; /* single quote - enter state 7 */ case '\'': state = 7; t++; break; /* other characters are assumed to be atom parts */ default: /* set cur pointer to beginning of val buffer */ vcur = val; /** NOTE: the following code originally required a transition to state 4 before processing the first atom character -- this required two iterations for the first character of each atom. merging this into here allows us to process what we already know to be a valid atom character before entering state 4. **/ vcur[0] = t[0]; if (t[0] == '\\') esc = 1; else esc = 0; val_used++; if (val_used == val_allocated) { val = (char *)sexp_realloc(val, val_allocated+sexp_val_grow_size, val_allocated); assert(val != NULL); vcur = val + val_used; val_allocated += sexp_val_grow_size; } else vcur++; /* if the atom starts with # and we're in inline binary mode, we need to go to state 12 to start checking for the #b# prefix. otherwise, if it's not a # or we're just in normal mode, proceed to state 4 as usual. */ if (t[0] == '#' && mode == PARSER_INLINE_BINARY) { state = 12; } else { state = 4; } t++; break; } break; case 2: /* open paren */ depth++; sx = sexp_t_allocate(); assert(sx!=NULL); elts++; sx->ty = SEXP_LIST; sx->next = NULL; sx->list = NULL; if (stack->height < 1) { data = pd_allocate(); assert(data!=NULL); data->fst = data->lst = sx; push (stack, data); } else { data = (parse_data_t *) top_data (stack); if (data->lst != NULL) data->lst->next = sx; else data->fst = sx; data->lst = sx; } data = pd_allocate(); assert(data!=NULL); data->fst = data->lst = NULL; push (stack, data); state = 1; break; case 3: /** close paren **/#ifdef _DEBUG_ if (depth > maxdepth) maxdepth = depth;#endif /* _DEBUG_ */ /* check for close parens that were never opened. */ if (depth == 0) { fprintf(stderr,"Badly formed s-expression. Parser exiting.\n"); cc->bindata = bindata; cc->binread = binread; cc->binexpected = binexpected; cc->val = val; cc->mode = mode; cc->val_used = val_used; cc->val_allocated = val_allocated; cc->vcur = vcur; cc->lastPos = t; cc->depth = depth; cc->qdepth = qdepth; cc->state = 1; cc->stack = stack; cc->esc = 0; cc->last_sexp = NULL; cc->error = 1;#ifdef _DEBUG_ fprintf(stderr,"State Machine Iterations: %lu\nMaxdepth: %lu\n", fsm_iterations,maxdepth);#endif /* debug */ return cc; } t++; depth--; lvl = pop (stack); data = (parse_data_t *) lvl->data; sx = data->fst; pd_deallocate(data); lvl->data = NULL; if (stack->top != NULL) { data = (parse_data_t *) top_data (stack); data->lst->list = sx; } else { fprintf (stderr, "cparse_sexp: stack->top is null (state=%d).\n", state); } state = 1; /** if depth = 0 then we finished a sexpr, and we return **/ if (depth == 0) { cc->bindata = bindata; cc->binread = binread; cc->binexpected = binexpected; cc->error = 0; cc->mode = mode; cc->val = val; cc->val_allocated = val_allocated; cc->val_used = val_used; cc->vcur = vcur; cc->lastPos = t; cc->depth = depth; cc->qdepth = qdepth; cc->state = 1; cc->stack = stack; cc->esc = 0; while (stack->top != NULL) { lvl = pop (stack); data = (parse_data_t *) lvl->data; sx = data->fst; pd_deallocate(data); lvl->data = NULL; } cc->last_sexp = sx;#ifdef _DEBUG_ fprintf(stderr,"State Machine Iterations: %lu\nMaxdepth: %lu\n", fsm_iterations,maxdepth);#endif /* debug */ return cc; } break; case 4: /** parsing atom **/ if (esc == 1 && (t[0] == '\"' || t[0] == '(' || t[0] == ')' || t[0] == '\'' || t[0] == '\\')) { vcur--; /* back up to overwrite the \ */ vcur[0] = t[0]; vcur++; t++; esc = 0; break; } /* look at an ascii table - these ranges are the non-whitespace, non paren and quote characters that are legal in atoms */ if (!((t[0] >= '*' && t[0] <= '~') || ((unsigned char)(t[0]) > 127) || (t[0] == '!') || (t[0] >= '#' && t[0] <= '&'))) { vcur[0] = '\0'; val_used++; sx = sexp_t_allocate(); assert(sx!=NULL); elts++; sx->ty = SEXP_VALUE; sx->val = val; sx->val_allocated = val_allocated; sx->val_used = val_used; sx->next = NULL; if (squoted != 0) sx->aty = SEXP_SQUOTE; else sx->aty = SEXP_BASIC; val = (char *)sexp_malloc(sizeof(char)*sexp_val_start_size); assert(val != NULL); val_allocated = sexp_val_start_size; val_used = 0; vcur = val; if (!empty_stack (stack)) { data = (parse_data_t *) top_data (stack); if (data->fst == NULL) { data->fst = data->lst = sx; } else { data->lst->next = sx; data->lst = sx; } } else { /* looks like this expression was just a basic atom - so return it. */ cc->bindata = bindata; cc->binread = binread; cc->binexpected = binexpected; cc->mode = mode; cc->error = 0; cc->val = val; cc->val_used = val_used; cc->val_allocated = val_allocated; cc->vcur = vcur; cc->squoted = 0; cc->lastPos = t; cc->depth = depth; cc->qdepth = qdepth; cc->state = 1; cc->stack = stack; cc->esc = 0; cc->last_sexp = sx; assert(sx != NULL);#ifdef _DEBUG_ fprintf(stderr,"STATE 4: CURCHAR=%c\n",vcur[0]);#endif /* _DEBUG_ */ return cc; } switch (t[0]) { case ' ': case '\t': case '\n': case '\r': /** NOTE: we know whitespace following atom, so spin ahead one and let state 1 do what it needs to for the next character. **/ state = 1; t++; squoted = 0; break; case ')': squoted = 0; state = 3; break; default: squoted = 0; state = 1; } } else { vcur[0] = t[0]; if (t[0] == '\\') esc = 1; else esc = 0; val_used++; if (val_used == val_allocated) { val = (char *)sexp_realloc(val, val_allocated+sexp_val_grow_size, val_allocated); assert(val != NULL); vcur = val + val_used; val_allocated += sexp_val_grow_size; } else vcur++; t++; } break; case 5: if (esc == 1 && (t[0] == '\"' || t[0] == '\'' || t[0] == '(' || t[0] == ')' || t[0] == '\\')) { vcur--; vcur[0] = t[0]; vcur++; /** NO NEED TO UPDATE VAL COUNTS **/ t++; esc = 0; } if (t[0] == '\"') { state = 6; if (squoted == 1) { vcur[0] = '\"'; val_used++; if (val_used == val_allocated) { val = (char *)sexp_realloc(val, val_allocated+sexp_val_grow_size, val_allocated); assert(val != NULL); vcur = val + val_used; val_allocated += sexp_val_grow_size; } else vcur++; } vcur[0] = '\0'; val_used++; sx = sexp_t_allocate(); assert(sx!=NULL); elts++; sx->ty = SEXP_VALUE; sx->val = val; sx->val_used = val_used; sx->val_allocated = val_allocated; sx->next = NULL; if (squoted == 1) { sx->aty = SEXP_SQUOTE; squoted = 0; } else sx->aty = SEXP_DQUOTE; val = (char *)sexp_malloc(sizeof(char)*sexp_val_start_size); assert(val != NULL); val_allocated = sexp_val_start_size; val_used = 0;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -