📄 html.h
字号:
/* html.h (c) 1998 (W3C) MIT, INRIA, Keio University See tidy.c for the copyright notice.*//* indentation modes */#define NO_INDENT 0#define BLOCK_INDENT 1#define SMART_INDENT 2/* character encodings */#define RAW 0#define ASCII 1#define LATIN1 2#define UTF8 3#define ISO2022 4#define MACROMAN 5/* states for ISO 2022 A document in ISO-2022 based encoding uses some ESC sequences called "designator" to switch character sets. The designators defined and used in ISO-2022-JP are: "ESC" + "(" + ? for ISO646 variants "ESC" + "$" + ? and "ESC" + "$" + "(" + ? for multibyte character sets*/#define FSM_ASCII 0#define FSM_ESC 1#define FSM_ESCD 2#define FSM_ESCDP 3#define FSM_ESCP 4#define FSM_NONASCII 5/* lexer char types */#define digit 1#define letter 2#define namechar 4#define white 8#define newline 16#define lowercase 32#define uppercase 64/* lexer GetToken states */#define LEX_CONTENT 0#define LEX_GT 1#define LEX_ENDTAG 2#define LEX_STARTTAG 3#define LEX_COMMENT 4#define LEX_DOCTYPE 5#define LEX_PROCINSTR 6#define LEX_ENDCOMMENT 7#define LEX_CDATA 8#define LEX_SECTION 9#define LEX_ASP 10#define LEX_JSTE 11#define LEX_PHP 12/* content model shortcut encoding */#define CM_UNKNOWN 0#define CM_EMPTY (1 << 0)#define CM_HTML (1 << 1)#define CM_HEAD (1 << 2)#define CM_BLOCK (1 << 3)#define CM_INLINE (1 << 4)#define CM_LIST (1 << 5)#define CM_DEFLIST (1 << 6)#define CM_TABLE (1 << 7)#define CM_ROWGRP (1 << 8)#define CM_ROW (1 << 9)#define CM_FIELD (1 << 10)#define CM_OBJECT (1 << 11)#define CM_PARAM (1 << 12)#define CM_FRAMES (1 << 13)#define CM_HEADING (1 << 14)#define CM_OPT (1 << 15)#define CM_IMG (1 << 16)#define CM_MIXED (1 << 17)#define CM_NO_INDENT (1 << 18)#define CM_OBSOLETE (1 << 19)#define CM_NEW (1 << 20)#define CM_OMITST (1 << 21)/* Linked list of class names and styles*/struct _style{ char *tag; char *tag_class; char *properties; struct _style *next;};typedef struct _style Style;/* Linked list of style properties*/struct _styleprop{ char *name; char *value; struct _styleprop *next;};typedef struct _styleprop StyleProp;/* mode controlling treatment of doctype */typedef enum{ doctype_omit, doctype_auto, doctype_strict, doctype_loose, doctype_user} DocTypeMode;/* Attribute/Value linked list node*/struct _attval{ struct _attval *next; struct _attribute *dict; struct _node *asp; struct _node *php; int delim; char *attribute; char *value;};typedef struct _attval AttVal;/* node->type is one of these values*/#define RootNode 0#define DocTypeTag 1#define CommentTag 2#define ProcInsTag 3#define TextNode 4#define StartTag 5#define EndTag 6#define StartEndTag 7#define CDATATag 8#define SectionTag 9#define AspTag 10#define JsteTag 11#define PhpTag 12struct _node{ struct _node *parent; struct _node *prev; struct _node *next; struct _node *content; struct _node *last; struct _attval *attributes; char *element; /* name (null for text nodes) */ uint start; /* start of span onto text array */ uint end; /* end of span onto text array */ uint type; /* TextNode, StartTag, EndTag etc. */ Bool closed; /* true if closed by explicit end tag */ Bool implicit; /* true if inferred */ struct _tagdict *was; /* old tag when it was changed */ struct _tagdict *tag; /* tag's dictionary definition */};typedef struct _node Node;/* If the document uses just HTML 2.0 tags and attributes described it as HTML 2.0 Similarly for HTML 3.2 and the 3 flavors of HTML 4.0. If there are proprietary tags and attributes then describe it as HTML Proprietary. If it includes the xml-lang or xmlns attributes but is otherwise HTML 2.0, 3.2 or 4.0 then describe it as one of the flavors of Voyager (strict, loose or frameset).*/#define VERS_UNKNOWN 0#define VERS_HTML20 1#define VERS_HTML32 2#define VERS_HTML40_STRICT 4#define VERS_HTML40_LOOSE 8#define VERS_FRAMES 16#define VERS_XML 32#define VERS_NETSCAPE 64#define VERS_MICROSOFT 128#define VERS_SUN 256#define VERS_MALFORMED 512#define VERS_ALL (VERS_HTML20|VERS_HTML32|VERS_HTML40_STRICT|VERS_HTML40_LOOSE|VERS_FRAMES)#define VERS_HTML40 (VERS_HTML40_STRICT|VERS_HTML40_LOOSE|VERS_FRAMES)#define VERS_LOOSE (VERS_HTML32|VERS_HTML40_LOOSE|VERS_FRAMES)#define VERS_IFRAMES (VERS_HTML40_LOOSE|VERS_FRAMES)#define VERS_FROM32 (VERS_HTML40_STRICT|VERS_LOOSE)#define VERS_PROPRIETARY (VERS_NETSCAPE|VERS_MICROSOFT|VERS_SUN)#define VERS_EVERYTHING (VERS_ALL|VERS_PROPRIETARY)/* Mosaic handles inlines via a separate stack from other elements We duplicate this to recover from inline markup errors such as: <i>italic text <p>more italic text</b> normal text which for compatibility with Mosaic is mapped to: <i>italic text</i> <p><i>more italic text</i> normal text Note that any inline end tag pop's the effect of the current inline start tag, so that </b> pop's <i> in the above example.*/struct _inode{ struct _inode *next; struct _tagdict *tag; /* tag's dictionary definition */ char *element; /* name (null for text nodes) */ struct _attval *attributes;};typedef struct _inode IStack;typedef struct _lexer Lexer;/* tidy.c */#define EndOfStream EOF/* non-raw input is cleaned up*/typedef struct{ int state; /* FSM for ISO2022 */ Bool pushed; int c; int tabs; int lastcol; int curcol; int curline; int encoding; FILE *file; Lexer *lexer; /* needed for error reporting */} StreamIn;StreamIn *OpenInput(FILE *fp);int ReadChar(StreamIn *in);void UngetChar(int c, StreamIn *in);/* The following are private to the lexer Use NewLexer(fp) to create a lexer, and FreeLexer(lexer) to free it.*/struct _lexer{ StreamIn *in; /* file stream */ FILE *errout; /* error output stream */ uint badAccess; /* for accessibility errors */ uint badLayout; /* for bad style errors */ uint badChars; /* for bad char encodings */ uint badForm; /* for mismatched/mispositioned form tags */ uint warnings; /* count of warnings in this document */ uint errors; /* count of errors */ uint lines; /* lines seen */ uint columns; /* at start of current token */ Bool waswhite; /* used to collapse contiguous white space */ Bool pushed; /* true after token has been pushed back */ Bool insertspace; /* when space is moved after end tag */ Bool excludeBlocks; /* Netscape compatibility */ Bool exiled; /* true if moved out of table */ Bool isvoyager; /* true if xmlns attribute on html element */ uint versions; /* bit vector of HTML versions */ int doctype; /* version as given by doctype (if any) */ Bool bad_doctype; /* e.g. if html or PUBLIC is missing */ uint txtstart; /* start of current node */ uint txtend; /* end of current node */ uint state; /* state of lexer's finite state machine */ struct _node *token; /* lexer character buffer parse tree nodes span onto this buffer which contains the concatenated text contents of all of the elements. lexsize must be reset for each file. */ char *lexbuf; /* char buffer */ uint lexlength; /* allocated */ uint lexsize; /* used */ /* Inline stack for compatibility with Mosaic */ Node *inode; /* for deferring text node */ IStack *insert; /* for inferring inline tags */ IStack *istack; uint istacklength; /* allocated */ uint istacksize; /* used */ uint istackbase; /* start of frame */ Style *styles; /* used for cleaning up presentation markup */};typedef void (Parser)(Lexer *lexer, Node *node, uint mode);typedef void (CheckAttribs)(Lexer *lexer, Node *node);/* declaration for methods that check attribute values */typedef void (AttrCheck)(Lexer *lexer, Node *node, AttVal *attval);struct _attribute{ struct _attribute *next; char *name; Bool nowrap; unsigned versions; AttrCheck *attrchk;};typedef struct _attribute Attribute;/* well known attributes */extern Attribute *attr_href;extern Attribute *attr_src;extern Attribute *attr_id;extern Attribute *attr_name;extern Attribute *attr_summary;extern Attribute *attr_alt;extern Attribute *attr_longdesc;extern Attribute *attr_title;/* Tag dictionary node*/struct _tagdict{ struct _tagdict *next; char *name; uint versions; uint model; Parser *parser; CheckAttribs *chkattrs;};typedef struct _tagdict Dict;/* modes for GetToken() */#define IgnoreWhitespace 0#define MixedContent 1#define Preformatted 2#define IgnoreMarkup 3void FatalError(char *msg);void FileError(FILE *fp, const char *file);Node *GetToken(Lexer *lexer, uint mode);/* one level unget only */void UngetToken(Lexer *lexer);/* create lexer for a file stream */Lexer *NewLexer(StreamIn *in);/* delete lexer */void FreeLexer(Lexer *lexer);Bool EndOfInput(Lexer *lexer);/* used for script or style */Node *GetCDATA(Lexer *lexer, Node *container);/* use this to create node for inferred start tag */Node *InferredTag(Lexer *lexer, char *name);/* Parser calls this to create RootNode */Node *NewNode(void);AttVal *NewAttribute();void FreeAttrs(Node *node);void FreeAttribute(AttVal *av);/* use this to free parse tree node and all its children */void FreeNode(Node *node);/* used to clone heading nodes when split by an <HR> */Node *CloneNode(Lexer *lexer, Node *element);/* lexer char map - must be initialized */void InitMap(void);void AddCharToLexer(Lexer *lexer, uint c);void AddStringLiteral(Lexer *lexer, char *str);Node *TextToken(Lexer *lexer);/* used by pretty printer for tag names */char FoldCase(char c, Bool tocaps);
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -