📄 pars1.c
字号:
#include "sgmlincl.h" /* #INCLUDE statements for SGML parser. */#define GI (tags[ts].tetd->etdgi+1) /* GI of current element. */#define NEWGI (newetd->etdgi+1) /* GI of new tag. */static VOID doincludes P((void));static int pentname P((char *));static struct mpos *newmpos P((void));static VOID commbufs P((void));static VOID checkdtd P((void));/* PARSECON: Parse content of an element.*/int parsecon(tbuf, pcb)UNCH *tbuf; /* Work area for tokenization. */struct parse *pcb; /* Parse control block for this parse. */{ int srn; /* SHORTREF delimiter number (1-32). */ int refrc; /* Return code from sentref, stagetd, etc. */ TRACECON(etagimct, dostag, datarc, pcb, conrefsw, didreq); if (eodsw) return(EOD_); if (didreq && (conrefsw & TAGREF)) {didreq = 0; goto conr;} if (etagimct>0) {etagimsw = --etagimct ? 1 : 0; destack(); return(ETG_);} if (dostag) { conrefsw = conrefsv; etisw = etiswsv; if (charmode) {dostag = 0; return datarc;} return stag(datarc); } if (conrefsw) { conr: destack(); conrefsw = 0; return ETG_; } else if (eofsw) return(EOD_); datarc = 0; while (1) { parse(pcb); srn = (int)pcb->action - SRMIN; /* Just in case it's a SHORTREF. */ switch (pcb->action) { case DCE_: /* Data character in element content. */ /* The data character might be a non-SGML character so reprocess it using pcbconm. */ REPEATCC; pcb = conpcb = &pcbconm; pcb->newstate = pcbcnet; continue; case DAS_: /* Current character begins data. */ data = FPOS; continue; case NLF_: /* NET or SR returns data in lookahead buffer. */ datalen = (UNS)(ptcon - data); REPEATCC; goto rcc; case LAF_: /* Return data in lookahead buffer: mixed. */ datalen = (UNS)(ptcon+1 - data); goto rcc; case NON_: /* Single nonchar in nonchbuf. */ datalen = 2; data = nonchbuf; goto nrcc; case DAR_: /* Return data except for last char. */ REPEATCC; case DAF_: /* Return data in source entity buffer. */ datalen = (UNS)(FPOS - data); rcc: REPEATCC; case DEF_: /* Return data in data entity. */ nrcc: datarc = DAF_; if (pcb==&pcbcone) { pcbconm.newstate = pcbcnet; conpcb = &pcbconm; } if (charmode) return(datarc); stagmin = MINNONE; stagreal = newetd = ETDCDATA; return(stag(datarc)); case LAS_: /* Start lookahead buffer with current char. */ *(ptcon = data = tbuf+1) = *FPOS; continue; case LAM_: /* Move character to lookahead buffer. */ *++ptcon = *FPOS; continue; case STG_: /* Process non-null start-tag. */ CTRSET(tagctr); /* Start counting tag length. */ tages = es; parsenm(tbuf, NAMECASE); /* Get the GI. */ newetd = etdref(tbuf); if (newetd && newetd->adl) { parseatt(newetd->adl, tbuf); adlval((int)ADN(al), newetd); } parsetag(&pcbstag); /* Parse the tag ending. */ if ((CTRGET(tagctr)-tagdelsw)>=TAGLEN) sgmlerr(66, &pcbstag, (UNCH *)0, (UNCH *)0); if (!newetd) { sgmlerr(132, pcb, tbuf+1, (UNCH *)0); continue; } return(stagetd(&pcbstag)); case NST_: /* Process null start-tag. */ return nstetd(); case ETC_: /* End-tag in CDATA or RCDATA. */ case ETG_: /* Process non-null end-tag. */ newetd = etdref(parsenm(tbuf, NAMECASE)); /* Get the GI. */ parsetag(&pcbetag); /* Parse tag end. */ if (!newetd) /* Error: undefined.*/ sgmlerr(11, &pcbetag, tbuf+1, (UNCH *)0); else if (etagetd(&pcbetag)>=0) return ETG_;/* Open element. */ if (pcb->action!=ETC_) continue; /* Tag is undefined or not for an open element and we are in a CDATA or RCDATA element; issue message and treat as null end-tag (</>). */ sgmlerr(57, &pcbetag, (UNCH *)0, (UNCH *)0); case NET_: /* Process null end-tag. */ if ((refrc = netetd(conpcb))!=0) return ETG_; continue; case NED_: /* Process null end-tag delimiter. */ etagmin = MINNET; newetd = etagreal = ETDNET; etagimct = etag(); etagimsw = etagimct ? 1 : 0; destack(); return ETG_; case GTR_: if (entget()!=-1) { data = FPOS; continue; } /* fall through */ case EOD_: /* End of primary file. */ if (ts<1) return(EOD_); /* Normal end: stack is empty. */ etagimct = ts-1; /* Treat as end-tag for top tag on stack. */ etagmin = MINETAG; etagreal = tags[0].tetd; destack(); eofsw = 1; /* Return EOD_ after destacking all. */ return ETG_; /* Short references ending with blanks: If the blank sequence is followed by RE, go do SR7 or SR6. If the entity is undefined and we are in mixed content, the blanks must be returned as data. If not, they can be ignored. */ case SR9_: /* Process SR9 (two or more blanks). */ REPEATCC; /* Make first blank the CC. */ case SR4_: /* Process SR4 (RS, blanks). */ parseseq(tbuf, BSEQLEN); /* Squeeze out all blanks. */ if (*FPOS=='\r') {srn = (srn==9) ? 7 : 6; data = tbuf; goto sr6;} else REPEATCC; if ((refrc = shortref(srn, pcb))==DEF_) goto nrcc; if (refrc>0) return refrc; if (refrc==ENTUNDEF && pcb==&pcbconm) {data = tbuf; goto nrcc;} continue; /* Short references ending with RE: If the reference is defined, the RE is ignored. For RE and RS RE, no special action is needed if the reference is undefined, as the RE will be processed immediately as the current character. For B RE and RS B RE, the input is primed with a special character that will be treated as an RE that cannot be a short reference. */ case SR7_: /* Process SR7 (blanks, RE). */ datalen = (UNS)(FPOS - data); case SR2_: /* Process SR2 (RE). */ case SR5_: /* Process SR5 (RS, RE). */ sr6: /* Process SR6 (RS, blanks, RE). */ if ((refrc = shortref(srn, pcb))!=ENTUNDEF) { if (refrc==DEF_) goto nrcc; /* Defined: data entity. */ if (refrc>0) return refrc; /* Defined: tag entity. */ continue; /* Defined: not tag. */ } if (pcb!=&pcbconm) continue; /* Not mixed; ignore chars. */ if (srn>=6) /* Return blanks as data. */ {*FPOS = lex.d.genre; REPEATCC; goto nrcc;} case REF_: /* Undefined SR with RE; return record end. */ datarc = REF_; if (charmode) return(datarc);#if 0 /* The standard says this situation can force a tag. See 323:3-6, 412:1-7. */ /* If RE would be ignored, don't treat it as start-tag because it could force a required tag; but do change state to show that an RE was ignored. */ if (scbsgml[pss].snext==scbsgmst) { scbsgml[pss].snext = scbsgmnr; TRACEGML(scbsgml, pss, conactsw, conact); continue; }#endif stagmin = MINNONE; stagreal = newetd = ETDCDATA; return(stag(datarc)); case SR3_: /* Process SR3 (RS). */ REPEATCC; if ((refrc = shortref(srn, pcb))==DEF_) goto nrcc; if (refrc>0) return refrc; continue; case RBR_: /* Two right brackets */ srn = 26; REPEATCC; /* fall through */ case SR1_: /* Process SR1 (TAB). */ case SR8_: /* Process SR8 (space). */ case SR19: /* Process SR19 (-). */ case SR26: /* Process SR26 (]). */ REPEATCC; goto srproc; case FCE_: /* Process free character (SR11-18, SR21-32). */ fce[0] = *FPOS; srn = mapsrch(&lex.s.dtb[lex.s.fce], fce); case SR10: /* Process SR10 ("). */ case SR11: /* Process SR11 (#). */ case SR20: /* Process SR20 (-). */ case SR25: /* Process SR25 ([). */ srproc: if ((refrc = shortref(srn, pcb))==DEF_) goto nrcc; if (refrc>0) return refrc; if (refrc==ENTUNDEF) { /* Treat the SR as data. */ data = FPOS - (srn==lex.s.hyp2);/* Two data chars if SR20.*/ if (pcb!=&pcbconm) { /* If not in mixed content: */ if (srn>=lex.s.data) { /* Change PCB. */ pcb = conpcb = &pcbconm; pcb->newstate = pcbcnda; } } else pcb->newstate = pcbcnda;/* Now in data found state. */ } continue; case ERX_: /* Entity ref in RCDATA: cancel ending delims.*/ lexcon[lex.d.tago] = lex.l.fre; lexcon[lex.d.net] = lex.l.nonet; lexlms[lex.d.msc] = lex.l.fre; continue; case EE_: /* Entity end in RCDATA: check nesting. */ if (es<rcessv) {synerr(37, pcb); rcessv = es;} /* If back at top level, re-enable the ending delimiters. */ if (es==rcessv) { lexcon[lex.d.tago] = lex.l.tago; lexcon[lex.d.net] = etictr ? lex.l.net : lex.l.nonet; lexlms[lex.d.msc] = lex.l.msc; } continue; case PIE_: /* PI entity: same as PIS_. */ return PIS_; case RSR_: /* Record start: ccnt=0; ++rcnt.*/ ++RCNT; CTRSET(RSCC); return RSR_; case MSS_: if (ts == 0) synerr(217, pcb); return MSS_; default: return (int)pcb->action; /* Default (MD_ MDC_ MSS_ MSE_ PIS_). */ } }}/* STAGETD: Process start-tag etd.*/int stagetd(pcb)struct parse *pcb; /* Parse control block for this parse. */{ if (!newetd->etdmod) { sgmlerr(43, pcb, newetd->etdgi+1, (UNCH *)0); ++ds.etdercnt; etdset(newetd, (UNCH)SMO+EMO+ETDOCC, &undechdr, (PETD *)0, (PETD *)0, (PECB *)0); TRACEETD(newetd); } stagmin = MINNONE; stagreal = newetd; return stag(0);}/* NSTETD: Process null start-tag etd.*/int nstetd(){ if (sd.omittag && ts > 0) newetd = tags[ts].tetd; else if (!sd.omittag && lastetd != 0) newetd = lastetd; else newetd = tags[0].tetd->etdmod[2].tu.thetd; stagmin = MINNULL; stagreal = ETDNULL; etisw = 0; return stag(0);}/* ETAGETD: Process end-tag etd.*/int etagetd(pcb)struct parse *pcb; /* Parse control block for this parse. */{ etagmin = MINNONE; etagreal = newetd; if ((etagimct = etag())<0) { sgmlerr(E_ETAG, pcb, NEWGI, tags[ts].tetd->etdgi+1); return etagimct; } etagimsw = etagimct ? 1 : 0; destack(); return ETG_;}/* NETETD: Process null end-tag etd.*/int netetd(pcb)struct parse *pcb; /* Parse control block for this parse. */{ if (ts<1) { sgmlerr(51, pcb, (UNCH *)0, (UNCH *)0); return 0; } etagmin = MINNULL; etagreal = ETDNULL; etagimsw = 0; destack();
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -