📄 sgml.c
字号:
*/ return &HTTag_unrecognized; } return NULL;}/*________________________________________________________________________** Public Methods*//* Could check that we are back to bottom of stack! @@ *//* Do check! - FM *//* */PRIVATE void SGML_free ARGS1( HTStream *, context){ int i; HTElement * cur; HTTag * t; /* ** Free the buffers. - FM */ FREE(context->recover); FREE(context->url); FREE(context->csi); FREE(context->include); FREE(context->active_include); /* ** Wind down stack if any elements are open. - FM */ while (context->element_stack) { cur = context->element_stack; t = cur->tag; context->element_stack = cur->next; /* Remove from stack */ pool_free(cur);#ifdef USE_PRETTYSRC if (!psrc_view) /* Don't actually call on target if viewing psrc - kw */#endif (*context->actions->end_element)(context->target, NORMAL_TAGNUM(TAGNUM_OF_TAGP(t)), (char **)&context->include); FREE(context->include); } /* ** Finish off the target. - FM */ (*context->actions->_free)(context->target); /* ** Free the strings and context structure. - FM */ HTChunkFree(context->string); for (i = 0; i < MAX_ATTRIBUTES; i++) FREE_extra(context->value[i]); FREE(context);#ifdef USE_PRETTYSRC sgml_in_psrc_was_initialized = FALSE;#endif}PRIVATE void SGML_abort ARGS2( HTStream *, context, HTError, e){ int i; HTElement * cur; /* ** Abort the target. - FM */ (*context->actions->_abort)(context->target, e); /* ** Free the buffers. - FM */ FREE(context->recover); FREE(context->include); FREE(context->active_include); FREE(context->url); FREE(context->csi); /* ** Free stack memory if any elements were left open. - KW */ while (context->element_stack) { cur = context->element_stack; context->element_stack = cur->next; /* Remove from stack */ pool_free(cur); } /* ** Free the strings and context structure. - FM */ HTChunkFree(context->string); for (i = 0; i < MAX_ATTRIBUTES; i++) FREE_extra(context->value[i]); FREE(context);#ifdef USE_PRETTYSRC sgml_in_psrc_was_initialized = FALSE;#endif}/* Read and write user callback handle** -----------------------------------**** The callbacks from the SGML parser have an SGML context parameter.** These calls allow the caller to associate his own context with a** particular SGML context.*/#ifdef CALLERDATAPUBLIC void* SGML_callerData ARGS1( HTStream *, context){ return context->callerData;}PUBLIC void SGML_setCallerData ARGS2( HTStream *, context, void*, data){ context->callerData = data;}#endif /* CALLERDATA */PRIVATE void SGML_character ARGS2( HTStream *, context, char, c_in){ CONST SGML_dtd *dtd = context->dtd; HTChunk *string = context->string; CONST char * EntityName; HTTag * testtag = NULL; BOOLEAN chk; /* Helps (?) walk through all the else ifs... */ UCode_t clong, uck = 0; /* Enough bits for UCS4 ... */ int testlast;#ifdef CJK_EX unsigned char c;#else char c;#endif char saved_char_in = '\0'; /* ** Now some fun with the preprocessor. ** Use copies for c and unsign_c == clong, so that ** we can revert back to the unchanged c_in. - KW */#define unsign_c clong c = c_in; clong = UCH(c); /* a.k.a. unsign_c */ if (context->T.decode_utf8) { /* ** Combine UTF-8 into Unicode. ** Incomplete characters silently ignored. ** From Linux kernel's console.c. - KW */ if (TOASCII(UCH(c)) > 127) { /* S/390 -- gil -- 0710 */ /* ** We have an octet from a multibyte character. - FM */ if (context->utf_count > 0 && (TOASCII(c) & 0xc0) == 0x80) { context->utf_char = (context->utf_char << 6) | (TOASCII(c) & 0x3f); context->utf_count--; *(context->utf_buf_p) = c; (context->utf_buf_p)++; if (context->utf_count == 0) { /* ** We have all of the bytes, so terminate ** the buffer and set 'clong' to the UCode_t ** value. - FM */ *(context->utf_buf_p) = '\0'; clong = context->utf_char; if (clong < 256) { c = ((char)(clong & 0xff)); } goto top1; } else { /* ** Wait for more. - KW */ return; } } else { /* ** Start handling a new multibyte character. - FM */ context->utf_buf_p = context->utf_buf; *(context->utf_buf_p) = c; (context->utf_buf_p)++; if ((c & 0xe0) == 0xc0) { context->utf_count = 1; context->utf_char = (c & 0x1f); } else if ((c & 0xf0) == 0xe0) { context->utf_count = 2; context->utf_char = (c & 0x0f); } else if ((c & 0xf8) == 0xf0) { context->utf_count = 3; context->utf_char = (c & 0x07); } else if ((c & 0xfc) == 0xf8) { context->utf_count = 4; context->utf_char = (c & 0x03); } else if ((c & 0xfe) == 0xfc) { context->utf_count = 5; context->utf_char = (c & 0x01); } else { /* ** Garbage. - KW */ context->utf_count = 0; context->utf_buf_p = context->utf_buf; *(context->utf_buf_p) = '\0'; } /* ** Wait for more. - KW */ return; } } else { /* ** Got an ASCII char. - KW */ context->utf_count = 0; context->utf_buf_p = context->utf_buf; *(context->utf_buf_p) = '\0'; /* goto top; */ } } /* end of context->T.decode_utf8 S/390 -- gil -- 0726 */#ifdef NOTDEFINED /* ** If we have a koi8-r input and do not have ** koi8-r as the output, save the raw input ** in saved_char_in before we potentially ** convert it to Unicode. - FM */ if (context->T.strip_raw_char_in) saved_char_in = c;#endif /* NOTDEFINED */ /* ** If we want the raw input converted ** to Unicode, try that now. - FM */ if (context->T.trans_to_uni && ((TOASCII(unsign_c) >= LYlowest_eightbit[context->inUCLYhndl]) || /* S/390 -- gil -- 0744 */ (unsign_c < ' ' && unsign_c != 0 && context->T.trans_C0_to_uni))) { /* ** Convert the octet to Unicode. - FM */ clong = UCTransToUni(c, context->inUCLYhndl); if (clong > 0) { saved_char_in = c; if (clong < 256) { c = FROMASCII((char)clong); } } goto top1; } else if (unsign_c < ' ' && unsign_c != 0 && /* S/390 -- gil -- 0768 */ context->T.trans_C0_to_uni) { /* ** This else if may be too ugly to keep. - KW */ if (context->T.trans_from_uni && (((clong = UCTransToUni(c, context->inUCLYhndl)) >= ' ') || (context->T.transp && (clong = UCTransToUni(c, context->inUCLYhndl)) > 0))) { saved_char_in = c; if (clong < 256) { c = FROMASCII((char)clong); } goto top1; } else { uck = -1; if (context->T.transp) { uck = UCTransCharStr(replace_buf, 60, c, context->inUCLYhndl, context->inUCLYhndl, NO); } if (!context->T.transp || uck < 0) { uck = UCTransCharStr(replace_buf, 60, c, context->inUCLYhndl, context->outUCLYhndl, YES); } if (uck == 0) { return; } else if (uck < 0) { goto top0a; } c = replace_buf[0]; if (c && replace_buf[1]) { if (context->state == S_text) { PUTS(replace_buf); return; } StrAllocCat(context->recover, replace_buf + 1); } goto top0a; } /* Next line end of ugly stuff for C0. - KW */ } else { /* end of context->T.trans_to_uni S/390 -- gil -- 0791 */ goto top0a; } /* ** At this point we have either unsign_c a.k.a. clong in ** Unicode (and c in latin1 if clong is in the latin1 range), ** or unsign_c and c will have to be passed raw. - KW *//*** We jump up to here from below if we have** stuff in the recover, insert, or csi buffers** to process. We zero saved_char_in, in effect** as a flag that the octet in not that of the** actual call to this function. This may be OK** for now, for the stuff this function adds to** its recover buffer, but it might not be for** stuff other functions added to the insert or** csi buffer, so bear that in mind. - FM** Stuff from the recover buffer is now handled** as UTF-8 if we can expect that's what it is,** and in that case we don't come back up here. - kw*/top: saved_char_in = '\0';/*** We jump to here from above when we don't have** UTF-8 input, haven't converted to Unicode, and** want clong set to the input octet (unsigned)** without zeroing its saved_char_in copy (which** is signed). - FM*/top0a: *(context->utf_buf) = '\0'; clong = UCH(c);/*** We jump to here from above if we have converted** the input, or a multibyte sequence across calls,** to a Unicode value and loaded it into clong (to** which unsign_c has been defined), and from below** when we are recycling a character (e.g., because** it terminated an entity but is not the standard** semi-colon). The character will already have** been put through the Unicode conversions. - FM*/top1: /* ** Ignore low ISO 646 7-bit control characters ** if HTCJK is not set. - FM */ /* ** Works for both ASCII and EBCDIC. -- gil */ /* S/390 -- gil -- 0811 */ if (TOASCII(unsign_c) < 32 && c != '\t' && c != '\n' && c != '\r' && HTCJK == NOCJK) goto after_switch; /* ** Ignore 127 if we don't have HTPassHighCtrlRaw ** or HTCJK set. - FM */#define PASSHICTRL (context->T.transp || \ unsign_c >= LYlowest_eightbit[context->inUCLYhndl]) if (TOASCII(c) == 127 && /* S/390 -- gil -- 0830 */ !(PASSHICTRL || HTCJK != NOCJK)) goto after_switch; /* ** Ignore 8-bit control characters 128 - 159 if ** neither HTPassHighCtrlRaw nor HTCJK is set. - FM */ if (TOASCII(unsign_c) > 127 && TOASCII(unsign_c) < 160 && /* S/390 -- gil -- 0847 */ !(PASSHICTRL || HTCJK != NOCJK)) goto after_switch; /* Almost all CJK characters are double byte but only Japanese * JIS X0201 Kana is single byte. To prevent to fail SGML parsing * we have to care them here. -- TH */ if ((HTCJK==JAPANESE) && (context->state==S_in_kanji) && !IS_JAPANESE_2BYTE(context->kanji_buf, UCH(c))) {#ifdef CONV_JISX0201KANA_JISX0208KANA if (IS_SJIS_X0201KANA(context->kanji_buf)) { unsigned char sjis_hi, sjis_lo; JISx0201TO0208_SJIS(context->kanji_buf, &sjis_hi, &sjis_lo); PUTC(sjis_hi); PUTC(sjis_lo); } else#endif PUTC(context->kanji_buf); context->state = S_text; } /* ** Handle character based on context->state. */ CTRACE2(TRACE_SGML, (tfp, "SGML before %s|%.*s|%c|\n", state_name(context->state), string->size, NonNull(string->data), UCH(c))); switch(context->state) { case S_in_kanji: /* ** Note that if we don't have a CJK input, then this ** is not the second byte of a CJK di-byte, and we're ** trashing the input. That's why 8-bit characters ** followed by, for example, '<' can cause the tag to ** be treated as text, not markup. We could try to deal ** with it by holding each first byte and then checking ** byte pairs, but that doesn't seem worth the overhead ** (see below). - FM */ context->state = S_text; PUTC(context->kanji_buf); PUTC(c); break; case S_tagname_slash: /* * We had something link "<name/" so far, set state to S_text * but keep context->slashedtag as as a flag; except if we get * '>' directly after the "<name/", and really have a tag for * that name in context->slashedtag, in which case keep state as * is and let code below deal with it. - kw */ if (!(c == '>' && context->slashedtag && TOASCII(unsign_c) < 127)) { context->state = S_text; } /* fall through in any case! */ case S_text: if (HTCJK != NOCJK && (TOASCII(c) & 0200) != 0) { /* S/390 -- gil -- 0864 */ /* ** Setting up for Kanji multibyte handling (based on ** Takuya ASADA's (asada@three-a.co.jp) CJK Lynx). ** Note that if the input is not in fact CJK, the ** next byte also will be mishandled, as explained ** above. Toggle raw mode off in such cases, or ** select the "7 bit approximations" display ** character set, which is largely equivalent ** to having raw mode off with CJK. - FM */ context->state = S_in_kanji; context->kanji_buf = c; break; } else if (HTCJK != NOCJK && TOASCII(c) == '\033') { /* S/390 -- gil -- 0881 */ /* ** Setting up for CJK escape sequence handling (based on ** Takuya ASADA's (asada@three-a.co.jp) CJK Lynx). - FM */ context->state = S_esc; PUTC(c); break; } if (c == '&' || c == '<') {#ifdef USE_PRETTYSRC if (psrc_view) { /*there is nothing useful in the element_stack*/ testtag = context->current_tag;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -