📄 ambig.c
字号:
/* ambig.c - Content model ambiguity checking. Written by James Clark (jjc@jclark.com).*//*This uses the construction in pp8-9 of [1], extended to deal with ANDgroups.Note that it is not correct for the purposes of ambiguity analysis tohandle AND groups by turning them into an OR group of SEQ groups(consider (a&b?)).We build an automaton for the entire content model by adding thefollowing case for AND:nullable(v) := nullable(left child) and nullable(right child)if nullable(right child) then for each x in last(left child) do follow(v,x) = follow(left child,x) U first(right child);if nullable(left child) then for each x in last(right child) do follow(v,x) = follow(right child,x) U first(left child);first(v) := first(left child) U first(right child);last(v) := first(left child) U first(right child);We also build an automaton for each AND group by building automata foreach of the members of the AND group using the above procedure andthen combine the members using:for each x in last(left child) do follow(v,x) = follow(left child,x) U first(right child);for each x in last(right child) do follow(v,x) = follow(right child,x) U first(left child);first(v) := first(left child) U first(right child);The content model is ambiguous just in case one of these automata isnon-deterministic. (Note that when checking determinism we need tocheck the `first' set as well as all the `follow' sets.)Why is this correct? Consider a primitive token in a member of an ANDgroup. There are two worst cases for ambiguity: firstly, when none ofthe other members of AND group have been matched; secondly, when justthe nullable members remain to be matched. The first case is notaffected by context of the AND group (unless the first case isidentical to the second case.)Note that inclusions are not relevant for the purposes of determiningthe ambiguity of content models. Otherwise the case in clause11.2.5.1: An element that can satisfy an element in the content model is considered to do so, even if the element is also an inclusion.could never arise.[1] Anne Brueggemann-Klein, Regular Expressions into Finite Automata,Universitaet Freiburg, Institut fur Informatik, 33 July 1991.*/#include "sgmlincl.h"/* Sets of states are represented by 0-terminated, ordered lists ofindexes in gbuf. */#define MAXSTATES (GRPGTCNT+2)#define listcat(x, y) strcat((char *)(x), (char *)(y))#define listcpy(x, y) strcpy((char *)(x), (char *)(y))/* Information about a content token. */struct contoken { UNCH size; UNCH nullable; UNCH *first; UNCH *last;};static VOID contoken P((int, int, struct contoken *));static VOID andgroup P((int, int, struct contoken *));static VOID orgroup P((int, int, struct contoken *));static VOID seqgroup P((int, int, struct contoken *));static VOID andambig P((int));static int listambig P((UNCH *));static VOID listmerge P((UNCH *, UNCH *));static struct contoken *newcontoken P((void));static VOID freecontoken P((struct contoken *));/* Dynamically allocated vector of follow sets. */static UNCH **follow;static UNCH *mergebuf; /* for use by listmerge *//* Set to non-zero if the content model is ambiguous. */static int ambigsw;/* Check the current content model (in gbuf) for ambiguity. */VOID ambig(){ struct contoken *s; int i; if (!follow) { /* We can't allocate everything in one chunk, because that would overflow a 16-bit unsigned if GRPGTCNT was 253. */ UNCH *ptr; follow = (UNCH **)rmalloc(MAXSTATES*sizeof(UNCH *)); follow[0] = 0; ptr = (UNCH *)rmalloc((MAXSTATES - 1)*MAXSTATES); for (i = 1; i < MAXSTATES; i++) { follow[i] = ptr; ptr += MAXSTATES; } mergebuf = (UNCH *)rmalloc(MAXSTATES); } for (i = 1; i < MAXSTATES; i++) follow[i][0] = 0; ambigsw = 0; s = newcontoken(); contoken(1, 1, s); ambigsw = ambigsw || listambig(s->first); freecontoken(s); for (i = 1; !ambigsw && i < MAXSTATES; i++) if (listambig(follow[i])) ambigsw = 1; if (ambigsw) mderr(137, (UNCH *)0, (UNCH *)0);}/* Free memory used for ambiguity checking. */VOID ambigfree(){ if (follow) { frem((UNIV)follow[1]); frem((UNIV)follow); frem((UNIV)mergebuf); follow = 0; }}/* Determine whether a list of primitive content tokens (eachrepresented by its index in gbuf) is ambiguous. */staticint listambig(list)UNCH *list;{ UNCH *p; int chars = 0; int rc = 0; for (p = list; *p; p++) { if ((gbuf[*p].ttype & TTMASK) == TTETD) { struct etd *e = gbuf[*p].tu.thetd; if (e->mark) { rc = 1; break; } e->mark = 1; } else { assert((gbuf[*p].ttype & TTMASK) == TTCHARS); if (chars) { rc = 1; break; } chars = 1; } } for (p = list; *p; p++) if ((gbuf[*p].ttype & TTMASK) == TTETD) gbuf[*p].tu.thetd->mark = 0; return rc;}/* Analyze a content token. The `checkand' argument is needed to ensurethat the algorithm is not exponential in the AND-group nesting depth.*/staticVOID contoken(m, checkand, res)int m; /* Index of content token in gbuf */int checkand; /* Non-zero if AND groups should be checked */struct contoken *res; /* Result */{ UNCH flags = gbuf[m].ttype; switch (flags & TTMASK) { case TTCHARS: case TTETD: res->first[0] = m; res->first[1] = 0; res->last[0] = m; res->last[1] = 0; res->size = 1; res->nullable = 0; break; case TTAND: if (checkand) andambig(m); andgroup(m, checkand, res); break; case TTOR: orgroup(m, checkand, res); break; case TTSEQ: seqgroup(m, checkand, res); break; default: abort(); } if (flags & TREP) { UNCH *p; for (p = res->last; *p; p++) listmerge(follow[*p], res->first); } if (flags & TOPT) res->nullable = 1;}/* Check an AND group for ambiguity. */staticVOID andambig(m)int m;{ int i, tnum; int lim; struct contoken *curr; struct contoken *next; tnum = gbuf[m].tu.tnum; assert(tnum > 0); curr = newcontoken(); next = newcontoken(); contoken(m + 1, 0, curr); i = m + 1 + curr->size; curr->size += 1; for (--tnum; tnum > 0; --tnum) { UNCH *p; contoken(i, 0, next); curr->size += next->size; i += next->size; for (p = curr->last; *p; p++) listcat(follow[*p], next->first); for (p = next->last; *p; p++) listmerge(follow[*p], curr->first); listcat(curr->first, next->first); listcat(curr->last, next->last); } lim = m + curr->size; for (i = m + 1; i < lim; i++) { if (listambig(follow[i])) ambigsw = 1; follow[i][0] = 0; } freecontoken(curr); freecontoken(next);}/* Handle an AND group. */staticVOID andgroup(m, checkand, res)int m;int checkand;struct contoken *res;{ int i, tnum; /* union of the first sets of nullable members of the group */ UNCH *nullablefirst; struct contoken *next; tnum = gbuf[m].tu.tnum; assert(tnum > 0); contoken(m + 1, checkand, res); nullablefirst = (UNCH *)rmalloc(MAXSTATES); if (res->nullable) listcpy(nullablefirst, res->first); else nullablefirst[0] = 0; i = m + 1 + res->size; res->size += 1; next = newcontoken(); for (--tnum; tnum > 0; --tnum) { UNCH *p; contoken(i, checkand, next); res->size += next->size; i += next->size; if (next->nullable) for (p = res->last; *p; p++) listcat(follow[*p], next->first); for (p = next->last; *p; p++) listmerge(follow[*p], nullablefirst); listcat(res->first, next->first); if (next->nullable) listcat(nullablefirst, next->first); listcat(res->last, next->last); res->nullable &= next->nullable; } frem((UNIV)nullablefirst); freecontoken(next);}/* Handle a SEQ group. */staticVOID seqgroup(m, checkand, res)int m;int checkand;struct contoken *res;{ int i, tnum; struct contoken *next; tnum = gbuf[m].tu.tnum; assert(tnum > 0); contoken(m + 1, checkand, res); i = m + 1 + res->size; res->size += 1; next = newcontoken(); for (--tnum; tnum > 0; --tnum) { UNCH *p; contoken(i, checkand, next); res->size += next->size; i += next->size; for (p = res->last; *p; p++) listcat(follow[*p], next->first); if (res->nullable) listcat(res->first, next->first); if (next->nullable) listcat(res->last, next->last); else listcpy(res->last, next->last); res->nullable &= next->nullable; } freecontoken(next);}/* Handle an OR group. */staticVOID orgroup(m, checkand, res)int m;int checkand;struct contoken *res;{ int i, tnum; struct contoken *next; tnum = gbuf[m].tu.tnum; assert(tnum > 0); contoken(m + 1, checkand, res); i = m + 1 + res->size; res->size += 1; next = newcontoken(); for (--tnum; tnum > 0; --tnum) { contoken(i, checkand, next); res->size += next->size; i += next->size; listcat(res->first, next->first); listcat(res->last, next->last); res->nullable |= next->nullable; } freecontoken(next);}/* Merge the second ordered list into the first. */staticVOID listmerge(p, b)UNCH *p, *b;{ UNCH *a = mergebuf; strcpy((char *)a, (char *)p); for (;;) { if (*a) { if (*b) { if (*a < *b) *p++ = *a++; else if (*a > *b) *p++ = *b++; else a++; } else *p++ = *a++; } else if (*b) *p++ = *b++; else break; } *p = '\0';}staticstruct contoken *newcontoken(){ struct contoken *p = (struct contoken *)rmalloc(sizeof(struct contoken) + MAXSTATES*2); p->first = (UNCH *)(p + 1); p->last = p->first + MAXSTATES; return p;}staticVOID freecontoken(p)struct contoken *p;{ frem((UNIV)p);}/*Local Variables:c-indent-level: 5c-continued-statement-offset: 5c-brace-offset: -5c-argdecl-indent: 0c-label-offset: -5End:*/
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -