html.h

来自「我搜集到的一个java常用类库的源代码」· C头文件代码 · 共 823 行 · 第 1/2 页
823 行
/* html.h  (c) 1998 (W3C) MIT, INRIA, Keio University  See tidy.c for the copyright notice.*//* indentation modes */#define NO_INDENT      0#define BLOCK_INDENT   1#define SMART_INDENT   2/* character encodings */#define RAW         0#define ASCII       1#define LATIN1      2#define UTF8        3#define ISO2022     4#define MACROMAN    5/* states for ISO 2022  A document in ISO-2022 based encoding uses some ESC sequences called  "designator" to switch character sets. The designators defined and  used in ISO-2022-JP are:    "ESC" + "(" + ?     for ISO646 variants    "ESC" + "$" + ?     and    "ESC" + "$" + "(" + ?   for multibyte character sets*/#define FSM_ASCII   0#define FSM_ESC     1#define FSM_ESCD    2#define FSM_ESCDP   3#define FSM_ESCP    4#define FSM_NONASCII 5/* lexer char types */#define digit       1#define letter      2#define namechar    4#define white       8#define newline     16#define lowercase   32#define uppercase   64/* lexer GetToken states */#define LEX_CONTENT     0#define LEX_GT          1#define LEX_ENDTAG      2#define LEX_STARTTAG    3#define LEX_COMMENT     4#define LEX_DOCTYPE     5#define LEX_PROCINSTR   6#define LEX_ENDCOMMENT  7#define LEX_CDATA       8#define LEX_SECTION     9#define LEX_ASP         10#define LEX_JSTE        11#define LEX_PHP         12/* content model shortcut encoding */#define CM_UNKNOWN         0#define CM_EMPTY        (1 << 0)#define CM_HTML         (1 << 1)#define CM_HEAD         (1 << 2)#define CM_BLOCK        (1 << 3)#define CM_INLINE       (1 << 4)#define CM_LIST         (1 << 5)#define CM_DEFLIST      (1 << 6)#define CM_TABLE        (1 << 7)#define CM_ROWGRP       (1 << 8)#define CM_ROW          (1 << 9)#define CM_FIELD        (1 << 10)#define CM_OBJECT       (1 << 11)#define CM_PARAM        (1 << 12)#define CM_FRAMES       (1 << 13)#define CM_HEADING      (1 << 14)#define CM_OPT          (1 << 15)#define CM_IMG          (1 << 16)#define CM_MIXED        (1 << 17)#define CM_NO_INDENT    (1 << 18)#define CM_OBSOLETE     (1 << 19)#define CM_NEW          (1 << 20)#define CM_OMITST       (1 << 21)/* Linked list of class names and styles*/struct _style{    char *tag;    char *tag_class;    char *properties;    struct _style *next;};typedef struct _style Style;/* Linked list of style properties*/struct _styleprop{    char *name;    char *value;    struct _styleprop *next;};typedef struct _styleprop StyleProp;/* mode controlling treatment of doctype */typedef enum{    doctype_omit,    doctype_auto,    doctype_strict,    doctype_loose,    doctype_user} DocTypeMode;/* Attribute/Value linked list node*/struct _attval{    struct _attval *next;    struct _attribute *dict;    struct _node *asp;    struct _node *php;    int delim;    char *attribute;    char *value;};typedef struct _attval AttVal;/*  node->type is one of these values*/#define RootNode        0#define DocTypeTag      1#define CommentTag      2#define ProcInsTag      3#define TextNode        4#define StartTag        5#define EndTag          6#define StartEndTag     7#define CDATATag        8#define SectionTag      9#define AspTag          10#define JsteTag         11#define PhpTag          12struct _node{    struct _node *parent;    struct _node *prev;    struct _node *next;    struct _node *content;    struct _node *last;    struct _attval *attributes;    char *element;          /* name (null for text nodes) */    uint start;             /* start of span onto text array */    uint end;               /* end of span onto text array */    uint type;              /* TextNode, StartTag, EndTag etc. */    Bool closed;            /* true if closed by explicit end tag */    Bool implicit;          /* true if inferred */    struct _tagdict *was;   /* old tag when it was changed */    struct _tagdict *tag;   /* tag's dictionary definition */};typedef struct _node Node;/* If the document uses just HTML 2.0 tags and attributes described it as HTML 2.0 Similarly for HTML 3.2 and the 3 flavors of HTML 4.0. If there are proprietary tags and attributes then describe it as HTML Proprietary. If it includes the xml-lang or xmlns attributes but is otherwise HTML 2.0, 3.2 or 4.0 then describe it as one of the flavors of Voyager (strict, loose or frameset).*/#define VERS_UNKNOWN       0#define VERS_HTML20        1#define VERS_HTML32        2#define VERS_HTML40_STRICT 4#define VERS_HTML40_LOOSE  8#define VERS_FRAMES       16#define VERS_XML          32#define VERS_NETSCAPE     64#define VERS_MICROSOFT   128#define VERS_SUN         256#define VERS_MALFORMED   512#define VERS_ALL (VERS_HTML20|VERS_HTML32|VERS_HTML40_STRICT|VERS_HTML40_LOOSE|VERS_FRAMES)#define VERS_HTML40 (VERS_HTML40_STRICT|VERS_HTML40_LOOSE|VERS_FRAMES)#define VERS_LOOSE (VERS_HTML32|VERS_HTML40_LOOSE|VERS_FRAMES)#define VERS_IFRAMES (VERS_HTML40_LOOSE|VERS_FRAMES)#define VERS_FROM32  (VERS_HTML40_STRICT|VERS_LOOSE)#define VERS_PROPRIETARY (VERS_NETSCAPE|VERS_MICROSOFT|VERS_SUN)#define VERS_EVERYTHING (VERS_ALL|VERS_PROPRIETARY)/*  Mosaic handles inlines via a separate stack from other elements  We duplicate this to recover from inline markup errors such as:     <i>italic text     <p>more italic text</b> normal text  which for compatibility with Mosaic is mapped to:     <i>italic text</i>     <p><i>more italic text</i> normal text  Note that any inline end tag pop's the effect of the current  inline start tag, so that </b> pop's <i> in the above example.*/struct _inode{    struct _inode *next;    struct _tagdict *tag;   /* tag's dictionary definition */    char *element;          /* name (null for text nodes) */    struct _attval *attributes;};typedef struct _inode IStack;typedef struct _lexer Lexer;/* tidy.c */#define EndOfStream EOF/* non-raw input is cleaned up*/typedef struct{    int state;     /* FSM for ISO2022 */    Bool pushed;    int c;    int tabs;    int lastcol;    int curcol;    int curline;    int encoding;    FILE *file;    Lexer *lexer;  /* needed for error reporting */} StreamIn;StreamIn *OpenInput(FILE *fp);int ReadChar(StreamIn *in);void UngetChar(int c, StreamIn *in);/*  The following are private to the lexer  Use NewLexer(fp) to create a lexer, and  FreeLexer(lexer) to free it.*/struct _lexer{    StreamIn *in;   /* file stream */    FILE *errout;   /* error output stream */    uint badAccess; /* for accessibility errors */    uint badLayout; /* for bad style errors */    uint badChars;  /* for bad char encodings */    uint badForm;   /* for mismatched/mispositioned form tags */    uint warnings;  /* count of warnings in this document */    uint errors;    /* count of errors */    uint lines;     /* lines seen */    uint columns;   /* at start of current token */    Bool waswhite;  /* used to collapse contiguous white space */    Bool pushed;    /* true after token has been pushed back */    Bool insertspace;   /* when space is moved after end tag */    Bool excludeBlocks;  /* Netscape compatibility */    Bool exiled;    /* true if moved out of table */    Bool isvoyager; /* true if xmlns attribute on html element */    uint versions;  /* bit vector of HTML versions */    int doctype;    /* version as given by doctype (if any) */    Bool bad_doctype; /* e.g. if html or PUBLIC is missing */    uint txtstart;  /* start of current node */    uint txtend;    /* end of current node */    uint state;     /* state of lexer's finite state machine */    struct _node *token;    /*       lexer character buffer      parse tree nodes span onto this buffer      which contains the concatenated text      contents of all of the elements.     lexsize must be reset for each file.    */    char *lexbuf;     /* char buffer */    uint lexlength;   /* allocated */    uint lexsize;     /* used */    /* Inline stack for compatibility with Mosaic */    Node *inode;        /* for deferring text node */    IStack *insert;     /* for inferring inline tags */    IStack *istack;    uint istacklength;  /* allocated */    uint istacksize;    /* used */    uint istackbase;    /* start of frame */    Style *styles;      /* used for cleaning up presentation markup */};typedef void (Parser)(Lexer *lexer, Node *node, uint mode);typedef void (CheckAttribs)(Lexer *lexer, Node *node);/* declaration for methods that check attribute values */typedef void (AttrCheck)(Lexer *lexer, Node *node, AttVal *attval);struct _attribute{    struct _attribute *next;    char *name;    Bool nowrap;    unsigned versions;    AttrCheck *attrchk;};typedef struct _attribute Attribute;/* well known attributes */extern Attribute *attr_href;extern Attribute *attr_src;extern Attribute *attr_id;extern Attribute *attr_name;extern Attribute *attr_summary;extern Attribute *attr_alt;extern Attribute *attr_longdesc;extern Attribute *attr_title;/* Tag dictionary node*/struct _tagdict{    struct _tagdict *next;    char *name;    uint versions;    uint model;    Parser *parser;    CheckAttribs *chkattrs;};typedef struct _tagdict Dict;/* modes for GetToken() */#define IgnoreWhitespace    0#define MixedContent        1#define Preformatted        2#define IgnoreMarkup        3void FatalError(char *msg);void FileError(FILE *fp, const char *file);Node *GetToken(Lexer *lexer, uint mode);/* one level unget only */void UngetToken(Lexer *lexer);/* create lexer for a file stream */Lexer *NewLexer(StreamIn *in);/* delete lexer */void FreeLexer(Lexer *lexer);Bool EndOfInput(Lexer *lexer);/* used for script or style */Node *GetCDATA(Lexer *lexer, Node *container);/* use this to create node for inferred start tag */Node *InferredTag(Lexer *lexer, char *name);/* Parser calls this to create RootNode */Node *NewNode(void);AttVal *NewAttribute();void FreeAttrs(Node *node);void FreeAttribute(AttVal *av);/* use this to free parse tree node and all its children */void FreeNode(Node *node);/* used to clone heading nodes when split by an <HR> */Node *CloneNode(Lexer *lexer, Node *element);/* lexer char map - must be initialized */void InitMap(void);void AddCharToLexer(Lexer *lexer, uint c);void AddStringLiteral(Lexer *lexer, char *str);Node *TextToken(Lexer *lexer);/* used by pretty printer for tag names */char FoldCase(char c, Bool tocaps);
html.h - 源码说明

本页面展示了「我搜集到的一个java常用类库的源代码」中的 html.h 源码文件，采用 C头文件编程语言编写，共 823 行代码。您可以在线阅读完整代码内容，也可以返回资源详情页下载完整源码包进行本地学习和开发。
虫虫下载站收录了大量与java相关的技术资源，包括源代码、技术文档、电路图等，是电子工程师和嵌入式开发者的专业学习平台。
⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?