📄 tidy.c
字号:
return no;}/* For mac users, should we map Unicode back to MacRoman? */void outc(uint c, Out *out){ uint ch; if (out->encoding == UTF8) { if (c < 128) putc(c, out->fp); else if (c <= 0x7FF) { ch = (0xC0 | (c >> 6)); putc(ch, out->fp); ch = (0x80 | (c & 0x3F)); putc(ch, out->fp); } else if (c <= 0xFFFF) { ch = (0xE0 | (c >> 12)); putc(ch, out->fp); ch = (0x80 | ((c >> 6) & 0x3F)); putc(ch, out->fp); ch = (0x80 | (c & 0x3F)); putc(ch, out->fp); } else if (c <= 0x1FFFFF) { ch = (0xF0 | (c >> 18)); putc(ch, out->fp); ch = (0x80 | ((c >> 12) & 0x3F)); putc(ch, out->fp); ch = (0x80 | ((c >> 6) & 0x3F)); putc(ch, out->fp); ch = (0x80 | (c & 0x3F)); putc(ch, out->fp); } else { ch = (0xF8 | (c >> 24)); putc(ch, out->fp); ch = (0x80 | ((c >> 18) & 0x3F)); putc(ch, out->fp); ch = (0x80 | ((c >> 12) & 0x3F)); putc(ch, out->fp); ch = (0x80 | ((c >> 6) & 0x3F)); putc(ch, out->fp); ch = (0x80 | (c & 0x3F)); putc(ch, out->fp); } } else if (out->encoding == ISO2022) { if (c == 0x1b) /* ESC */ out->state = FSM_ESC; else { switch (out->state) { case FSM_ESC: if (c == '$') out->state = FSM_ESCD; else if (c == '(') out->state = FSM_ESCP; else out->state = FSM_ASCII; break; case FSM_ESCD: if (c == '(') out->state = FSM_ESCDP; else out->state = FSM_NONASCII; break; case FSM_ESCDP: out->state = FSM_NONASCII; break; case FSM_ESCP: out->state = FSM_ASCII; break; case FSM_NONASCII: c &= 0x7F; break; } } putc(c, out->fp); } else putc(c, out->fp);}/* first time initialization which should precede reading the command line*/void InitTidy(void){ InitMap(); InitAttrs(); InitTags(); InitEntities(); InitConfig(); totalerrors = totalwarnings = 0; XmlTags = XmlOut = HideEndTags = UpperCaseTags = MakeClean = writeback = OnlyErrors = no; errfile = null; errout = stderr;#ifdef CONFIG_FILE ParseConfigFile(CONFIG_FILE);#endif}/* call this when you have finished with tidy to free the hash tables and other resources*/void DeInitTidy(void){ FreeTags(); FreeAttrTable(); FreeEntities(); FreeConfig(); FreePrintBuf();}int main(int argc, char **argv){ char *file, *prog; FILE *fp = null; Node *document; Lexer *lexer; char *s, c, *arg, *current_errorfile = "stderr"; int jmpret; Out out; /* normal output stream */#if PRESERVEFILETIMES struct utimbuf filetimes; struct stat sbuf;#endif Bool haveFileTimes; /* set up for long jump back to here on severe errors */ jmpret = setjmp(error_exit); /* return on a severe error after long jump */ if (jmpret != 0) { /* ensure input is closed */ if (fp && fp != stdin) fclose(fp); /* 2 signifies a serious error */ return 2; } InitTidy(); /* look for env var "HTML_TIDY" */ /* then for ~/.tidyrc (on Unix) */ if ((file = getenv("HTML_TIDY"))) ParseConfigFile(file);#ifdef SUPPORT_GETPWNAM else ParseConfigFile("~/.tidyrc");#endif /* SUPPORT_GETPWNAM */ /* read command line */ prog = argv[0]; while (argc > 0) { if (argc > 1 && argv[1][0] == '-') { /* support -foo and --foo */ arg = argv[1] + 1;#if 0 if (arg[0] == '-') ++arg;#endif if (strcmp(arg, "indent") == 0) IndentContent = yes; else if (strcmp(arg, "4mz") == 0) { ForMZ = yes; xHTML = yes; } else if (strcmp(arg, "xml") == 0) XmlTags = yes; else if (strcmp(arg, "asxml") == 0 || strcmp(arg, "asxhtml") == 0) xHTML = yes; else if (strcmp(arg, "indent") == 0) { IndentContent = yes; SmartIndent = yes; } else if (strcmp(arg, "omit") == 0) HideEndTags = yes; else if (strcmp(arg, "upper") == 0) UpperCaseTags = yes; else if (strcmp(arg, "clean") == 0) MakeClean = yes; else if (strcmp(arg, "raw") == 0) CharEncoding = RAW; else if (strcmp(arg, "ascii") == 0) CharEncoding = ASCII; else if (strcmp(arg, "latin1") == 0) CharEncoding = LATIN1; else if (strcmp(arg, "utf8") == 0) CharEncoding = UTF8; else if (strcmp(arg, "iso2022") == 0) CharEncoding = ISO2022; else if (strcmp(arg, "mac") == 0) CharEncoding = MACROMAN; else if (strcmp(arg, "numeric") == 0) NumEntities = yes; else if (strcmp(arg, "modify") == 0) writeback = yes; else if (strcmp(arg, "change") == 0) /* obsolete */ writeback = yes; else if (strcmp(arg, "update") == 0) /* obsolete */ writeback = yes; else if (strcmp(arg, "errors") == 0) OnlyErrors = yes; else if (strcmp(arg, "quiet") == 0) Quiet = yes; else if (strcmp(arg, "slides") == 0) BurstSlides = yes; else if (strcmp(arg, "help") == 0 || argv[1][1] == '?'|| argv[1][1] == 'h') { HelpText(stdout, prog); return 1; } else if (strcmp(arg, "config") == 0) { if (argc >= 3) { ParseConfigFile(argv[2]); --argc; ++argv; } } else if (strcmp(argv[1], "-file") == 0 || strcmp(argv[1], "--file") == 0 || strcmp(argv[1], "-f") == 0) { if (argc >= 3) { /* create copy that can be freed by FreeConfig() */ errfile = wstrdup(argv[2]); --argc; ++argv; } } else if (strcmp(argv[1], "-wrap") == 0 || strcmp(argv[1], "--wrap") == 0 || strcmp(argv[1], "-w") == 0) { if (argc >= 3) { sscanf(argv[2], "%d", &wraplen); --argc; ++argv; } } else if (strcmp(argv[1], "-version") == 0 || strcmp(argv[1], "--version") == 0 || strcmp(argv[1], "-v") == 0) { ShowVersion(errout); /* called to free hash tables etc. */ DeInitTidy(); return 0; } else if(strncmp(argv[1],"--",2)==0) { if (ParseConfig(argv[1]+2, argv[2])) { ++argv; --argc; } } else { s = argv[1]; while ((c = *++s)) { if (c == 'i') { IndentContent = yes; SmartIndent = yes; } else if (c == 'o') HideEndTags = yes; else if (c == 'u') UpperCaseTags = yes; else if (c == 'c') MakeClean = yes; else if (c == 'n') NumEntities = yes; else if (c == 'm') writeback = yes; else if (c == 'e') OnlyErrors = yes; else if (c == 'q') Quiet = yes; else UnknownOption(stderr, c); } } --argc; ++argv; continue; } /* ensure config is self-consistent */ AdjustConfig(); /* user specified error file */ if (errfile) { /* is it same as the currently opened file? */ if (wstrcmp(errfile, current_errorfile) != 0) { /* no so close previous error file */ if (errout != stderr) fclose(errout); /* and try to open the new error file */ fp = fopen(errfile, "w"); if (fp != null) { errout = fp; current_errorfile = errfile; } else /* can't be opened so fall back to stderr */ { errout = stderr; current_errorfile = "stderr"; } } } haveFileTimes = no; if (argc > 1) { file = argv[1]; fp = fopen(file, "r");#if PRESERVEFILETIMES /* get last modified time */ if (KeepFileTimes && fp && fstat(fileno(fp), &sbuf) != -1) { filetimes.actime = sbuf.st_atime; filetimes.modtime = sbuf.st_mtime; haveFileTimes = yes; }#endif } else { fp = stdin; file = "stdin"; } if (fp != null) { lexer = NewLexer(OpenInput(fp)); lexer->errout = errout; /* store pointer to lexer in input stream to allow character encoding errors to be reported */ lexer->in->lexer = lexer; /* Tidy doesn't alter the doctype for generic XML docs */ if (XmlTags) document = ParseXMLDocument(lexer); else { lexer->warnings = 0; if (!Quiet) HelloMessage(errout, release_date, file); document = ParseDocument(lexer); if (!CheckNodeIntegrity(document)) { fprintf(stderr, "\nPanic - tree has lost its integrity\n"); exit(1); } /* cleans up <dir>indented text</dir> etc. */ List2BQ(document); BQ2Div(document); /* replaces i by em and b by strong */ if (LogicalEmphasis) EmFromI(document); if (Word2000 && IsWord2000(document)) { /* prune Word2000's <![if ...]> ... <![endif]> */ DropSections(lexer, document); /* drop style & class attributes and empty p, span elements */ CleanWord2000(document); } /* replaces presentational markup by style rules */ if (MakeClean || DropFontTags) CleanTree(lexer, document); if (!CheckNodeIntegrity(document)) { fprintf(stderr, "\nPanic - tree has lost its integrity\n"); exit(1); } if (document->content) { if (xHTML) SetXHTMLDocType(lexer, document); else FixDocType(lexer, document); if (TidyMark) AddGenerator(lexer, document); } /* ensure presence of initial <?XML version="1.0"?> */ if (XmlOut && XmlPi) FixXMLPI(lexer, document); totalwarnings += lexer->warnings; totalerrors += lexer->errors; if (!Quiet && document->content) { ReportVersion(errout, file, HTMLVersionName(lexer)); ReportNumWarnings(errout, lexer); } } if (fp != stdin) { fclose(fp); } MemFree(lexer->in); if (lexer->errors > 0) NeedsAuthorIntervention(errout); out.state = FSM_ASCII; out.encoding = CharEncoding; if (!OnlyErrors && lexer->errors == 0) { if (BurstSlides) { Node *body, *doctype; /* remove doctype to avoid potential clash with markup introduced when bursting into slides */ /* discard the document type */ doctype = FindDocType(document); if (doctype) DiscardElement(doctype); /* slides use transitional features */ lexer->versions |= VERS_HTML40_LOOSE; /* and patch up doctype to match */ if (xHTML) SetXHTMLDocType(lexer, document); else FixDocType(lexer, document); /* find the body element which may be implicit */ body = FindBody(document); if (body) { ReportNumberOfSlides(errout, CountSlides(body)); CreateSlides(lexer, document); } else MissingBody(errout); } else if (writeback && (fp = fopen(file, "w"))) { out.fp = fp; if (XmlTags) PPrintXMLTree(&out, null, 0, lexer, document); else PPrintTree(&out, null, 0, lexer, document); PFlushLine(&out, 0);#if PRESERVEFILETIMES /* set file last accessed/modified times to original values */ if (haveFileTimes) futime(fileno(fp), &filetimes);#endif fclose(fp); } else { out.fp = stdout; if (XmlTags) PPrintXMLTree(&out, null, 0, lexer, document); else PPrintTree(&out, null, 0, lexer, document); PFlushLine(&out, 0); } } ErrorSummary(lexer); FreeNode(document); FreeLexer(lexer); } else UnknownFile(errout, prog, file); --argc; ++argv; if (argc <= 1) break; } if (totalerrors + totalwarnings > 0) GeneralInfo(errout); if (errout != stderr) fclose(errout); /* called to free hash tables etc. */ DeInitTidy(); /* return status can be used by scripts */ if (totalerrors > 0) return 2; if (totalwarnings > 0) return 1; /* 0 signifies all is ok */ return 0;}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -