📄 dfa.c
字号:
max_char = parse_info->charset->size; pos = (short *) imalloc (sizeof(*pos) * (parse_info->position+1)); tran_set = cp_Set (parse_info->poset, parse_info->root->firstpos); i = add_DFA_state (dfas, &tran_set, &dfa_from); assert (i); dfa_from->rule_no = 0; poset = parse_info->poset; followpos = parse_info->followpos; while ((dfa_from = get_DFA_state (dfas))) { pos_i = pos; j = i = 0; for (tran_set = dfa_from->set; tran_set; tran_set = tran_set->next) if ((c = posar[tran_set->value]->u.ch[0]) >= 0 && c <= max_char) *pos_i++ = tran_set->value; else if (c < 0) { if (i == 0 || c > i) i = c; c = posar[tran_set->value]->u.ch[1]; if (j == 0 || c > j) j = c; } *pos_i = -1; dfa_from->rule_no = -i; dfa_from->rule_nno = -j; for (char_1 = 0; char_1 <= max_char; char_1++) { char_0 = max_char+1; for (pos_i = pos; (i = *pos_i) != -1; ++pos_i) if (posar[i]->u.ch[1] >= char_1 && (c=posar[i]->u.ch[0]) < char_0) { if (c < char_1) char_0 = char_1; else char_0 = c; } if (char_0 > max_char) break; char_1 = max_char; tran_set = mk_Set (poset); for (pos_i = pos; (i = *pos_i) != -1; ++pos_i) { if ((c=posar[i]->u.ch[0]) > char_0 && c <= char_1) char_1 = c - 1; /* forward chunk */ else if ((c=posar[i]->u.ch[1]) >= char_0 && c < char_1) char_1 = c; /* backward chunk */ if (posar[i]->u.ch[1] >= char_0 && posar[i]->u.ch[0] <= char_0) tran_set = union_Set (poset, tran_set, followpos[i]); } if (tran_set) { add_DFA_state (dfas, &tran_set, &dfa_to); add_DFA_tran (dfas, dfa_from, char_0, char_1, dfa_to->no); } } } ifree (pos); sort_DFA_states (dfas);}static void pr_tran (struct DFA_parse *parse_info, struct DFA_states *dfas){ struct DFA_state *s; struct DFA_tran *tran; int prev_no, i, c, no; for (no=0; no < dfas->no; ++no) { s = dfas->sortarray[no]; assert (s->no == no); printf ("DFA state %d", no); if (s->rule_no) printf ("#%d", s->rule_no); if (s->rule_nno) printf (" #%d", s->rule_nno); putchar (':'); pr_Set (parse_info->poset, s->set); prev_no = -1; for (i=s->tran_no, tran=s->trans; --i >= 0; tran++) { if (prev_no != tran->to) { if (prev_no != -1) printf ("]\n"); printf (" goto %d on [", tran->to); prev_no = tran->to; } for (c = tran->ch[0]; c <= tran->ch[1]; c++) printf ("%s", str_char(c)); } if (prev_no != -1) printf ("]\n"); putchar ('\n'); }}static void pr_verbose (struct DFA_parse *parse_info, struct DFA_states *dfas){ long i, j; int k; printf ("%d/%d tree nodes used, %d bytes each\n", parse_info->use_Tnode, parse_info->max_Tnode, sizeof (struct Tnode)); k = inf_BSetHandle (parse_info->charset, &i, &j); printf ("%ld/%ld character sets, %d bytes each\n", i/k, j/k, k*sizeof(BSetWord)); k = inf_SetType (parse_info->poset, &i, &j); printf ("%ld/%ld poset items, %d bytes each\n", i, j, k); printf ("%d DFA states\n", dfas->no);}static void pr_followpos (struct DFA_parse *parse_info){ struct Tnode **posar = parse_info->posar; int i; printf ("\nfollowsets:\n"); for (i=1; i <= parse_info->position; i++) { printf ("%3d:", i); pr_Set (parse_info->poset, parse_info->followpos[i]); putchar ('\t'); if (posar[i]->u.ch[0] < 0) printf ("#%d", -posar[i]->u.ch[0]); else if (posar[i]->u.ch[1] > posar[i]->u.ch[0]) { putchar ('['); out_char (posar[i]->u.ch[0]); if (posar[i]->u.ch[1] > posar[i]->u.ch[0]+1) putchar ('-'); out_char (posar[i]->u.ch[1]); putchar (']'); } else out_char (posar[i]->u.ch[0]); putchar ('\n'); } putchar ('\n');}void dfa_parse_cmap_clean (struct DFA *d){ struct DFA_parse *dfa = d->parse_info; assert (dfa); if (!dfa->charMap) { dfa->charMapSize = 7; dfa->charMap = (int *) imalloc (dfa->charMapSize * sizeof(*dfa->charMap)); } dfa->charMap[0] = 0;}void dfa_parse_cmap_new (struct DFA *d, const int *cmap){ struct DFA_parse *dfa = d->parse_info; const int *cp; int size; assert (dfa); for (cp = cmap; *cp; cp += 2) ; size = cp - cmap + 1; if (size > dfa->charMapSize) { if (dfa->charMap) ifree (dfa->charMap); dfa->charMapSize = size; dfa->charMap = (int *) imalloc (size * sizeof(*dfa->charMap)); } memcpy (dfa->charMap, cmap, size * sizeof(*dfa->charMap));}void dfa_parse_cmap_del (struct DFA *d, int from){ struct DFA_parse *dfa = d->parse_info; int *cc; assert (dfa); for (cc = dfa->charMap; *cc; cc += 2) if (*cc == from) { while ((cc[0] = cc[2])) { cc[1] = cc[3]; cc += 2; } break; }}void dfa_parse_cmap_add (struct DFA *d, int from, int to){ struct DFA_parse *dfa = d->parse_info; int *cc; int indx, size; assert (dfa); for (cc = dfa->charMap; *cc; cc += 2) if (*cc == from) { cc[1] = to; return ; } indx = cc - dfa->charMap; size = dfa->charMapSize; if (indx >= size) { int *cn = (int *) imalloc ((size+16) * sizeof(*dfa->charMap)); memcpy (cn, dfa->charMap, indx*sizeof(*dfa->charMap)); ifree (dfa->charMap); dfa->charMap = cn; dfa->charMapSize = size+16; } dfa->charMap[indx] = from; dfa->charMap[indx+1] = to; dfa->charMap[indx+2] = 0;}void dfa_parse_cmap_thompson (struct DFA *d){ static int thompson_chars[] = { '*', L_CLOS0, '+', L_CLOS1, '|', L_ALT, '^', L_START, '$', L_END, '?', L_QUEST, '.', L_ANY, '(', L_LP, ')', L_RP, ' ', 0, '\t', 0, '\n', 0, 0 }; dfa_parse_cmap_new (d, thompson_chars);}static struct DFA_parse *dfa_parse_init (void){ struct DFA_parse *parse_info = (struct DFA_parse *) imalloc (sizeof (struct DFA_parse)); parse_info->charset = mk_BSetHandle (255, 20); parse_info->position = 0; parse_info->rule = 0; parse_info->root = NULL; parse_info->anyset = mk_BSet (&parse_info->charset); res_BSet (parse_info->charset, parse_info->anyset); com_BSet (parse_info->charset, parse_info->anyset); parse_info->use_Tnode = parse_info->max_Tnode = 0; parse_info->start = parse_info->end = NULL; parse_info->charMap = NULL; parse_info->charMapSize = 0; parse_info->cmap = NULL; return parse_info;}static void rm_dfa_parse (struct DFA_parse **dfap){ struct DFA_parse *parse_info = *dfap; assert (parse_info); term_Tnode(parse_info); rm_BSetHandle (&parse_info->charset); ifree (parse_info->charMap); ifree (parse_info); *dfap = NULL;}static struct DFA_states *mk_dfas (struct DFA_parse *dfap, int poset_chunk){ struct DFA_states *dfas; struct DFA_parse *parse_info = dfap; assert (poset_chunk > 10); assert (dfap); parse_info->poset = mk_SetType (poset_chunk); init_pos(parse_info); init_followpos(parse_info); assert (parse_info->root); dfa_trav (parse_info, parse_info->root); if (debug_dfa_followpos) pr_followpos(parse_info); init_DFA_states (&dfas, parse_info->poset, (int) (STATE_HASH)); mk_dfa_tran (parse_info, dfas); if (debug_dfa_tran) { pr_tran (parse_info, dfas); } if (dfa_verbose) pr_verbose (parse_info, dfas); del_pos(parse_info); del_followpos(parse_info); rm_SetType (parse_info->poset); return dfas;}struct DFA *dfa_init (void){ struct DFA *dfa; dfa = (struct DFA *) imalloc (sizeof(*dfa)); dfa->parse_info = dfa_parse_init (); dfa->state_info = NULL; dfa->states = NULL; dfa_parse_cmap_thompson (dfa); return dfa;}void dfa_set_cmap (struct DFA *dfa, void *vp, const char **(*cmap)(void *vp, const char **from, int len)){ dfa->parse_info->cmap = cmap; dfa->parse_info->cmap_data = vp;}int dfa_parse (struct DFA *dfa, const char **pattern){ struct Tnode *top; struct DFA_parse *parse_info; assert (dfa); assert (dfa->parse_info); parse_info = dfa->parse_info; if (!parse_info->cmap) { res_BSet (parse_info->charset, parse_info->anyset); add_BSet (parse_info->charset, parse_info->anyset, '\n'); com_BSet (parse_info->charset, parse_info->anyset); } do_parse (parse_info, pattern, &top); if (parse_info->err_code) return parse_info->err_code; if ( !dfa->parse_info->root ) dfa->parse_info->root = top; else { struct Tnode *n; n = mk_Tnode (parse_info); n->pos = OR; n->u.p[0] = dfa->parse_info->root; n->u.p[1] = top; dfa->parse_info->root = n; } return 0;}void dfa_mkstate (struct DFA *dfa){ assert (dfa); dfa->state_info = mk_dfas (dfa->parse_info, POSET_CHUNK); dfa->no_states = dfa->state_info->no; dfa->states = dfa->state_info->sortarray; rm_dfa_parse (&dfa->parse_info);}void dfa_delete (struct DFA **dfap){ assert (dfap); assert (*dfap); if ((*dfap)->parse_info) rm_dfa_parse (&(*dfap)->parse_info); if ((*dfap)->state_info) rm_DFA_states (&(*dfap)->state_info); ifree (*dfap); *dfap = NULL;}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -