📄 pars2.c
字号:
} default: /* RCR_: Repeat char and return. */ break; } if (sw.swambig) ambig(); /* Check content model for ambiguity. */ return gbuf;}/* PARSEGCM: Collect token headers (struct thdr) into a group (array). An etd is defined for each GI (if none exists) and its pointer is stored in the header. The function is called recursively.*/struct thdr *parsegcm(pcb, pgh, gbuf)struct parse *pcb; /* Current parse control block. */struct thdr *pgh; /* Current group header in group buffer. */struct thdr *gbuf; /* Header for outermost group (model). */{#define MCON gbuf->ttype /* Model type (content attributes). */ struct thdr *pg=pgh; /* Current group token. */ struct thdr *pgsv=pgh; /* Saved current token for occ indicator. */ int optcnt = 0; /* Count of optional tokens in group. */ int essv = es; /* Entity stack level when grp started. */ while (gbuf->tu.tnum<=GRPGTCNT && pgh->tu.tnum<=GRPCNT && parse(pcb)!=GRPE) switch (pcb->action) { case NAS_: /* GI name: get its etd and store it. */ ++gbuf->tu.tnum; ++pgh->tu.tnum; (pgsv = ++pg)->ttype = TTETD; pg->tu.thetd = etddef(parsenm(tbuf, NAMECASE)); SET(MCON, MGI); continue; case RNS_: /* Reserved name started (#PCDATA). */ parsenm(tbuf, NAMECASE); if (ustrcmp(tbuf+1, key[KPCDATA])) { mderr(116, ntoa(gbuf->tu.tnum), tbuf+1); return (struct thdr *)0; } /* If #PCDATA is the first non-group token, model is a phrase. */ if (!MCON) SET(MCON, MPHRASE); case DTAG: /* Data tag template ignored; treat as #PCDATA. */ if (pcb->action==DTAG) SET(pgh->ttype, TTSEQ); /* DTAG is SEQ grp. */ ++gbuf->tu.tnum; ++pgh->tu.tnum; (++pg)->ttype = TTCHARS+TOREP;/* #PCDATA is OPT and REP. */ pg->tu.thetd = ETDCDATA; ++optcnt; /* Ct opt tokens to see if grp is opt.*/ SET(MCON, MCHARS); continue; case GRP_: /* Group started. */ ++gbuf->tu.tnum; ++pgh->tu.tnum; (pgsv = ++pg)->ttype = 0; /* Type will be set by connector. */ pg->tu.tnum = 0; /* Group has number instead of etd. */ if (++grplvl>GRPLVL) { mderr(115, ntoa(gbuf->tu.tnum), (UNCH *)0); return (struct thdr *)0; } pg = parsegcm(pcb, pg, gbuf); if (!pg) return (struct thdr *)0; if (GET(pgsv->ttype, TOPT)) ++optcnt; /* Indicate nested opt grp. */ --grplvl; continue; case OREP: /* OREP occurrence indicator for current token.*/ SET(pgsv->ttype, TREP|TXREP); /* Now treat like OPT. */ case OPT: /* OPT occurrence indicator for current token. */ SET(pgsv->ttype, TXOPT); if (GET(pgsv->ttype, TOPT)) continue; /* Exit if nested opt grp. */ SET(pgsv->ttype, TOPT); ++optcnt; /* Count opt tokens to see if grp is optional. */ continue; case REP: /* REP occurrence indicator for current token. */ SET(pgsv->ttype, TREP|TXREP); continue; case OR: /* OR connector found. */ if BITOFF(pgh->ttype, TTAND) SET(pgh->ttype, TTOR); else if (GET(pgh->ttype, TTAND)!=TTOR) mderr(55, ntoa(gbuf->tu.tnum), (UNCH *)0); continue; case AND: /* AND connector found. */ if BITOFF(pgh->ttype, TTAND) SET(pgh->ttype, TTAND); else if (GET(pgh->ttype, TTAND)!=TTAND) mderr(55, ntoa(gbuf->tu.tnum), (UNCH *)0); continue; case SEQ: /* SEQ connector found. */ if BITOFF(pgh->ttype, TTAND) SET(pgh->ttype, TTSEQ); else if (GET(pgh->ttype, TTAND)!=TTSEQ) mderr(55, ntoa(gbuf->tu.tnum), (UNCH *)0); continue; case EE_: /* Entity ended (correctly or incorrectly). */ if (es<essv) {synerr(37, pcb); essv = es;} continue; case PIE_: /* PI entity reference (not permitted). */ entpisw = 0; /* Reset PI entity indicator. */ synerr(59, pcb); continue; default: /* Syntax errors return in disgrace. */ synerr(37, pcb); return (struct thdr *)0; } if (pgh->tu.tnum>GRPCNT) { mderr(113, ntoa(gbuf->tu.tnum), (UNCH *)0); return (struct thdr *)0; } if (gbuf->tu.tnum>GRPGTCNT) { mderr(114, ntoa(gbuf->tu.tnum), (UNCH *)0); return (struct thdr *)0; } if (pgh->tu.tnum==1) SET(pgh->ttype, TTSEQ); /* Unit grp is SEQ. */ /* An optional token in an OR group makes the group optional. */ if (GET(pgh->ttype, TTMASK)==TTOR && optcnt) SET(pgh->ttype, TOPT); /* If all tokens in any group are optional, so is the group. */ if (pgh->tu.tnum<=optcnt) SET(pgh->ttype, TOPT); if (es!=essv) synerr(37, pcb); return pg; /* Return pointer to GRPS token. */}/* PARSENM: Parser for SGML names, which can be translated with LEXTRAN. The input is read from the entity stack. CC is 1st char of name. Returns a pointer to the parsed name.*/UNCH *parsenm(tbuf, nc)UNCH *tbuf; /* Buffer for name: >=NAMELEN+2. */int nc; /* Namecase translation: 1=yes; 0=no. */{ UNCH len; /* Length of name (incl EOS & length byte). */ *(tbuf + (len = 1) ) = nc ? lextran[*FPOS] : *FPOS; while ((NEWCC, (int)lextoke[*FPOS]>=NMC) && (len<NAMELEN)) { TRACETKN(NMC, lextoke); if (lextoke[*(tbuf + ++len) = (nc ? lextran[*FPOS] : *FPOS)]==EOB) { --len; entget(); } } REPEATCC; /* Put back the non-token character. */ *(tbuf + ++len) = EOS; /* Terminate name with standard EOS. */ *tbuf = ++len; /* Store length ahead of name. */ return tbuf;}/* PARSETKN: Parser for start-tag attribute value tokens. First character of token is already in *FPOS. Returns a pointer to the parsed token. Parsed token has EOS but no length byte.*/#ifdef USE_PROTOTYPESUNCH *parsetkn(UNCH *tbuf, UNCH scope, int maxlen)#elseUNCH *parsetkn(tbuf, scope, maxlen)UNCH *tbuf; /* Buffer for token: >=maxlen+1. */UNCH scope; /* Minimum lexical class allowed. */int maxlen; /* Maximum length of a token. */#endif{ int i = 1; tbuf[0] = *FPOS; while (i < maxlen) { NEWCC; if (lextoke[*FPOS] < scope) { REPEATCC; break; } TRACETKN(scope, lextoke); if (*FPOS == EOBCHAR) entget(); else tbuf[i++] = *FPOS; } tbuf[i] = EOS; return tbuf;}/* PARSESEQ: Parser for blank sequences (i.e., space and TAB characters ). First character of sequence is already in *FPOS.*/VOID parseseq(tbuf, maxlen)UNCH *tbuf; /* Buffer for storing found sequence. */int maxlen; /* Maximum length of a blank sequence. */{ tbuf[0] = *FPOS; datalen = 1; for (;;) { NEWCC; if (*FPOS == EOBCHAR) { entget(); continue; } if ((lextoke[*FPOS] != SEP && *FPOS != SPCCHAR) || datalen >= maxlen) break; tbuf[datalen++] = *FPOS; TRACETKN(SEP, lextoke); }}/* S2VALNM: Parser for attribute values that are tokenized like names. The input is read from a string (hence S ("string") 2 ("to") VALNM). It stops at the first bad character. Returns a pointer to the created name.*/#ifdef USE_PROTOTYPESUNCH *s2valnm(UNCH *nm, UNCH *s, UNCH scope, int translate)#elseUNCH *s2valnm(nm, s, scope, translate)UNCH *nm; /* Name to be created. */UNCH *s; /* Source string to be parsed as name. */UNCH scope; /* Minimum lexical class allowed. */int translate; /* Namecase translation: 1=yes; 0=no. */#endif{ UNCH len = 0; /* Length of name (incl EOS and length). */ for (; (int)lextoke[*s] >= scope && len < NAMELEN; s++) nm[++len] = translate ? lextran[*s] : *s; nm[++len] = EOS; /* Terminate name with standard EOS. */ *nm = ++len; /* Store length ahead of name. */ return nm;}/* PARSEVAL: Parser for attribute values. The input is read from a string and tokenized in a buffer. The input is terminated by EOS. Each token is preceded by its actual length; there is no EOS. If an error occurs while parsing, or if a token doesn't conform, set the token count to 0 to show that value was not tokenized and return the error code. After successful parse, return buffer length and 0 error code. The number of tokens found is set in external variable tokencnt.*/int parseval(s, atype, tbuf)UNCH *s; /* Source string to be parsed as token list. */UNS atype; /* Type of token list expected. */UNCH *tbuf; /* Work area for tokenization. */{ int t; UNCH *pt = tbuf; pcbval.newstate = 0; tokencnt = 0; while (1) { for (;;) { pcbval.input = lextoke[*s]; pcbval.state = pcbval.newstate; pcbval.newstate = (*(pcbval.ptab + pcbval.state)) [pcbval.input]; pcbval.action = (*(pcbval.ptab + pcbval.state+1)) [pcbval.input]; TRACEVAL(&pcbval, atype, s, tokencnt); if (pcbval.action != NOPA) break; s++; } switch (pcbval.action) { case INVA: /* Invalid character; terminate parse. */ if (*s == '\0') goto alldone; /* Normal termination. */ tokencnt = 0; /* Value was not tokenized. */ return(14); case LENA: /* Length limit of token exceeded; end parse. */ tokencnt = 0; /* Value was not tokenized. */ return(15); default: /* Token begun: NUMA, NASA, or NMTA. */ break; } ++tokencnt; /* One token per iteration. */ switch (atype) { case AENTITY: if (tokencnt>1) {tokencnt = 0; return(16);} case AENTITYS: if (pcbval.action!=NASA) {tokencnt = 0; return(17);} s2valnm(pt, s, NMC, ENTCASE); break; case AID: case AIDREF: case ANAME: case ANOTEGRP: if (tokencnt>1) {tokencnt = 0; return(16);} case AIDREFS: case ANAMES: if (pcbval.action!=NASA) {tokencnt = 0; return(17);} s2valnm(pt, s, NMC, NAMECASE); break; case ANMTGRP: case ANMTOKE: if (tokencnt>1) {tokencnt = 0; return(16);} case ANMTOKES: /* No test needed because NMTA, NUMA and NASA are all valid. */ s2valnm(pt, s, NMC, NAMECASE); break; case ANUMBER: if (tokencnt>1) {tokencnt = 0; return(16);} case ANUMBERS: if (pcbval.action!=NUMA) {tokencnt = 0; return(17);} s2valnm(pt, s, NU, NAMECASE); t = lextoke[s[*pt - 2]]; if (t == NMS || t == NMC) {tokencnt = 0; return(17);} break; case ANUTOKE: if (tokencnt>1) {tokencnt = 0; return(16);} case ANUTOKES: if (pcbval.action!=NUMA) {tokencnt = 0; return(17);} s2valnm(pt, s, NMC, NAMECASE); break; } *pt -= 2; s += *pt; pt += *pt + 1; } alldone: *pt++ = EOS; if (*tbuf == '\0') return 25; if (atype < ATKNLIST) *tbuf += 2; /* include length and EOS */ return 0;}/*Local Variables:c-indent-level: 5c-continued-statement-offset: 5c-brace-offset: -5c-argdecl-indent: 0c-label-offset: -5comment-column: 30End:*/
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -