📄 rx.c
字号:
struct rx_hash * memo; void * car; struct rx_se_list * cdr;#endif{ long hash = (long)car ^ (long)cdr; struct rx_se_list template; template.car = car; template.cdr = cdr; { struct rx_hash_item * it = rx_hash_store (memo, hash, (void *)&template, &se_list_hash_rules); if (!it) return 0; if (it->data == (void *)&template) { struct rx_se_list * consed; consed = (struct rx_se_list *) malloc (sizeof (*consed)); *consed = template; it->data = (void *)consed; } return (struct rx_se_list *)it->data; }} #ifdef __STDC__static struct rx_se_list *hash_se_prog (struct rx * rx, struct rx_hash * memo, struct rx_se_list * prog)#elsestatic struct rx_se_list *hash_se_prog (rx, memo, prog) struct rx * rx; struct rx_hash * memo; struct rx_se_list * prog;#endif{ struct rx_se_list * answer = 0; while (prog) { answer = hash_cons_se_prog (rx, memo, prog->car, answer); if (!answer) return 0; prog = prog->cdr; } return answer;}#ifdef __STDC__static int nfa_set_cmp (void * va, void * vb)#elsestatic int nfa_set_cmp (va, vb) void * va; void * vb;#endif{ struct rx_nfa_state_set * a = (struct rx_nfa_state_set *)va; struct rx_nfa_state_set * b = (struct rx_nfa_state_set *)vb; return ((va == vb) ? 0 : (!va ? -1 : (!vb ? 1 : (a->car->id < b->car->id ? 1 : (a->car->id > b->car->id ? -1 : nfa_set_cmp ((void *)a->cdr, (void *)b->cdr))))));}#ifdef __STDC__static int nfa_set_equal (void * va, void * vb)#elsestatic int nfa_set_equal (va, vb) void * va; void * vb;#endif{ return !nfa_set_cmp (va, vb);}static struct rx_hash_rules nfa_set_hash_rules ={ nfa_set_equal, compiler_hash_alloc, compiler_free_hash, compiler_hash_item_alloc, compiler_free_hash_item};#ifdef __STDC__static struct rx_nfa_state_set * nfa_set_cons (struct rx * rx, struct rx_hash * memo, struct rx_nfa_state * state, struct rx_nfa_state_set * set)#elsestatic struct rx_nfa_state_set * nfa_set_cons (rx, memo, state, set) struct rx * rx; struct rx_hash * memo; struct rx_nfa_state * state; struct rx_nfa_state_set * set;#endif{ struct rx_nfa_state_set template; struct rx_hash_item * node; template.car = state; template.cdr = set; node = rx_hash_store (memo, (((long)state) >> 8) ^ (long)set, &template, &nfa_set_hash_rules); if (!node) return 0; if (node->data == &template) { struct rx_nfa_state_set * l; l = (struct rx_nfa_state_set *) malloc (sizeof (*l)); node->data = (void *) l; if (!l) return 0; *l = template; } return (struct rx_nfa_state_set *)node->data;}#ifdef __STDC__static struct rx_nfa_state_set * nfa_set_enjoin (struct rx * rx, struct rx_hash * memo, struct rx_nfa_state * state, struct rx_nfa_state_set * set)#elsestatic struct rx_nfa_state_set * nfa_set_enjoin (rx, memo, state, set) struct rx * rx; struct rx_hash * memo; struct rx_nfa_state * state; struct rx_nfa_state_set * set;#endif{ if (!set || state->id < set->car->id) return nfa_set_cons (rx, memo, state, set); if (state->id == set->car->id) return set; else { struct rx_nfa_state_set * newcdr = nfa_set_enjoin (rx, memo, state, set->cdr); if (newcdr != set->cdr) set = nfa_set_cons (rx, memo, set->car, newcdr); return set; }}/* This page: computing epsilon closures. The closures aren't total. * Each node's closures are partitioned according to the side effects entailed * along the epsilon edges. Return true on success. */ struct eclose_frame{ struct rx_se_list *prog_backwards;};#ifdef __STDC__static int eclose_node (struct rx *rx, struct rx_nfa_state *outnode, struct rx_nfa_state *node, struct eclose_frame *frame)#elsestatic int eclose_node (rx, outnode, node, frame) struct rx *rx; struct rx_nfa_state *outnode; struct rx_nfa_state *node; struct eclose_frame *frame;#endif{ struct rx_nfa_edge *e = node->edges; /* For each node, we follow all epsilon paths to build the closure. * The closure omits nodes that have only epsilon edges. * The closure is split into partial closures -- all the states in * a partial closure are reached by crossing the same list of * of side effects (though not necessarily the same path). */ if (node->mark) return 1; node->mark = 1; if (node->id >= 0 || node->is_final) { struct rx_possible_future **ec; struct rx_se_list * prog_in_order = ((struct rx_se_list *)hash_se_prog (rx, &rx->se_list_memo, frame->prog_backwards)); int cmp; ec = &outnode->futures; while (*ec) { cmp = se_list_cmp ((void *)(*ec)->effects, (void *)prog_in_order); if (cmp <= 0) break; ec = &(*ec)->next; } if (!*ec || (cmp < 0)) { struct rx_possible_future * saved = *ec; *ec = rx_possible_future (rx, prog_in_order); (*ec)->next = saved; if (!*ec) return 0; } if (node->id >= 0) { (*ec)->destset = nfa_set_enjoin (rx, &rx->set_list_memo, node, (*ec)->destset); if (!(*ec)->destset) return 0; } } while (e) { switch (e->type) { case ne_epsilon: if (!eclose_node (rx, outnode, e->dest, frame)) return 0; break; case ne_side_effect: { frame->prog_backwards = side_effect_cons (rx, e->params.side_effect, frame->prog_backwards); if (!frame->prog_backwards) return 0; if (!eclose_node (rx, outnode, e->dest, frame)) return 0; { struct rx_se_list * dying = frame->prog_backwards; frame->prog_backwards = frame->prog_backwards->cdr; free ((char *)dying); } break; } default: break; } e = e->next; } node->mark = 0; return 1;}#ifdef __STDC__RX_DECL int rx_eclose_nfa (struct rx *rx)#elseRX_DECL int rx_eclose_nfa (rx) struct rx *rx;#endif{ struct rx_nfa_state *n = rx->nfa_states; struct eclose_frame frame; static int rx_id = 0; frame.prog_backwards = 0; rx->rx_id = rx_id++; bzero (&rx->se_list_memo, sizeof (rx->se_list_memo)); bzero (&rx->set_list_memo, sizeof (rx->set_list_memo)); while (n) { n->futures = 0; if (n->eclosure_needed && !eclose_node (rx, n, n, &frame)) return 0; /* clear_marks (rx); */ n = n->next; } return 1;}/* This deletes epsilon edges from an NFA. After running eclose_node, * we have no more need for these edges. They are removed to simplify * further operations on the NFA. */#ifdef __STDC__RX_DECL void rx_delete_epsilon_transitions (struct rx *rx)#elseRX_DECL void rx_delete_epsilon_transitions (rx) struct rx *rx;#endif{ struct rx_nfa_state *n = rx->nfa_states; struct rx_nfa_edge **e; while (n) { e = &n->edges; while (*e) { struct rx_nfa_edge *t; switch ((*e)->type) { case ne_epsilon: case ne_side_effect: t = *e; *e = t->next; rx_free_nfa_edge (t); break; default: e = &(*e)->next; break; } } n = n->next; }}/* This page: storing the nfa in a contiguous region of memory for * subsequent conversion to a super-nfa. *//* This is for qsort on an array of nfa_states. The order * is based on state ids and goes * [0...MAX][MIN..-1] where (MAX>=0) and (MIN<0) * This way, positive ids double as array indices. */#ifdef __STDC__static int nfacmp (void * va, void * vb)#elsestatic int nfacmp (va, vb) void * va; void * vb;#endif{ struct rx_nfa_state **a = (struct rx_nfa_state **)va; struct rx_nfa_state **b = (struct rx_nfa_state **)vb; return (*a == *b /* &&&& 3.18 */ ? 0 : (((*a)->id < 0) == ((*b)->id < 0) ? (((*a)->id < (*b)->id) ? -1 : 1) : (((*a)->id < 0) ? 1 : -1)));}#ifdef __STDC__static int count_hash_nodes (struct rx_hash * st)#elsestatic int count_hash_nodes (st) struct rx_hash * st;#endif{ int x; int count = 0; for (x = 0; x < 13; ++x) count += ((st->children[x]) ? count_hash_nodes (st->children[x]) : st->bucket_size[x]); return count;}#ifdef __STDC__static void se_memo_freer (struct rx_hash_item * node)#elsestatic void se_memo_freer (node) struct rx_hash_item * node;#endif{ free ((char *)node->data);}#ifdef __STDC__static void nfa_set_freer (struct rx_hash_item * node)#elsestatic void nfa_set_freer (node) struct rx_hash_item * node;#endif{ free ((char *)node->data);}/* This copies an entire NFA into a single malloced block of memory. * Mostly this is for compatability with regex.c, though it is convenient * to have the nfa nodes in an array. */#ifdef __STDC__RX_DECL int rx_compactify_nfa (struct rx *rx, void **mem, unsigned long *size)#elseRX_DECL int rx_compactify_nfa (rx, mem, size) struct rx *rx; void **mem; unsigned long *size;#endif{ int total_nodec; struct rx_nfa_state *n; int edgec = 0; int eclosec = 0; int se_list_consc = count_hash_nodes (&rx->se_list_memo); int nfa_setc = count_hash_nodes (&rx->set_list_memo); unsigned long total_size; /* This takes place in two stages. First, the total size of the * nfa is computed, then structures are copied. */ n = rx->nfa_states; total_nodec = 0; while (n) { struct rx_nfa_edge *e = n->edges; struct rx_possible_future *ec = n->futures; ++total_nodec; while (e) { ++edgec; e = e->next; } while (ec) { ++eclosec; ec = ec->next; } n = n->next; } total_size = (total_nodec * sizeof (struct rx_nfa_state) + edgec * rx_sizeof_bitset (rx->local_cset_size) + edgec * sizeof (struct rx_nfa_edge) + nfa_setc * sizeof (struct rx_nfa_state_set) + eclosec * sizeof (struct rx_possible_future) + se_list_consc * sizeof (struct rx_se_list) + rx->reserved); if (total_size > *size) { *mem = remalloc (*mem, total_size); if (*mem) *size = total_size; else return 0; } /* Now we've allocated the memory; this copies the NFA. */ { static struct rx_nfa_state **scratch = 0; static int scratch_alloc = 0; struct rx_nfa_state *state_base = (struct rx_nfa_state *) * mem; struct rx_nfa_state *new_state = state_base; struct rx_nfa_edge *new_edge = (struct rx_nfa_edge *) ((char *) state_base + total_nodec * sizeof (struct rx_nfa_state)); struct rx_se_list * new_se_list = (struct rx_se_list *) ((char *)new_edge + edgec * sizeof (struct rx_nfa_edge)); struct rx_possible_future *new_close = ((struct rx_possible_future *) ((char *) new_se_list + se_list_consc * sizeof (struct rx_se_list))); struct rx_nfa_state_set * new_nfa_set = ((struct rx_nfa_state_set *) ((char *)new_close + eclosec * sizeof (struct rx_possible_future))); char *new_bitset = ((char *) new_nfa_set + nfa_setc * sizeof (struct rx_nfa_state_set)); int x; struct rx_nfa_state *n; if (scratch_alloc < total_nodec) { scratch = ((struct rx_nfa_state **) remalloc (scratch, total_nodec * sizeof (*scratch))); if (scratch) scratch_alloc = total_nodec; else { scratch_alloc = 0; return 0; } } for (x = 0, n = rx->nfa_states; n; n = n->next) scratch[x++] = n; qsort (scratch, total_nodec, sizeof (struct rx_nfa_state *), (int (*)())nfacmp); for (x = 0; x < total_nodec; ++x) { struct rx_possible_future *eclose = scratch[x]->futures; struct rx_nfa_edge *edge = scratch[x]->edges; struct rx_nfa_state *cn = new_state++; cn->futures = 0; cn->edges = 0; cn->next = (x == total_nodec - 1) ? 0 : (cn + 1); cn->id = scratch[x]->id; cn->is_final = scratch[x]->is_final; cn->is_start = scratch[x]->is_start; cn->mark = 0; while (edge) { int indx = (edge->dest->id < 0 ? (total_nodec + edge->dest->id) : edge->dest->id); struct rx_nfa_edge *e = new_edge++; rx_Bitset cset = (rx_Bitset) new_bitset; new_bitset += rx_sizeof_bitset (rx->local_cset_size); rx_bitset_null (rx->local_cset_size, cset); rx_bitset_union (rx->local_cset_size, cset, edge->params.cset); e->next = cn->edges; cn->edges = e; e->type = edge->type; e->dest = state_base + indx; e->params.cset = cset; edge = edge->next; } while (eclose) { struct rx_possible_future *ec = new_close++; struct rx_hash_item * sp; struct rx_se_list ** sepos; struct rx_se_list * sesrc; struct rx_nfa_state_set * destlst; struct rx_nfa_state_set ** destpos; ec->next = cn->futures; cn->futures = ec; for (sepos = &ec->effects, sesrc = eclose->effects; sesrc; sesrc = sesrc->cdr, sepos = &(*sepos)->cdr) { sp = rx_hash_find (&rx->se_list_memo, (long)sesrc->car ^ (long)sesrc->cdr, sesrc, &se_list_hash_rules); if (sp->binding) { sesrc = (struct rx_se_list *)sp->binding; break; } *new_se_list = *sesrc; sp->binding = (void *)new_se_list; *sepos = new_se_list; ++new_se_list; } *sepos = sesrc; for (destpos = &ec->destset, destlst = eclose->destset; destlst; destpos = &(*destpos)->cdr, destlst = destlst->cdr) { sp = rx_hash_find (&rx->set_list_memo, ((((long)destlst->car) >> 8) ^ (long)destlst->cdr), destlst, &nfa_set_hash_rules); if (sp->binding) { destlst = (struct rx_nfa_state_set *)sp->binding; break; } *new_nfa_set = *destlst; new_nfa_set->car = state_base + destlst->car->id; sp->binding = (void *)new_nfa_set; *destpos = new_nfa_set; ++new_nfa_set; } *destpos = destlst; eclose = eclose->next; } }
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -