📄 wp2x.c

📁 harvest是一个下载html网页得机器人
💻 C
📖 第 1 页 / 共 3 页
字号:
    { "center", 0 },    { "CenterHere", 0 },    { "centerhere", 0 },    { "Align", 0 },    { "align", 0 },    { "AlignChar", 1 },    { "FlushRight", 0 },    { "flushright", 0 },    { "Math", 0 },    { "math", 0 },    { "MathCalc", 0 },    { "MathCalcColumn", 0 },    { "SubTotal", 0 },    { "IsSubTotal", 0 },    { "Total", 0 },    { "IsTotal", 0 },    { "GrandTotal", 0 },    { "NegateTotal", 0 },    { "Col", 0 },    { "col", 0 },    { "Fn", 0 },    { "fn", 0 },    { "En", 0 },    { "en", 0 },    { "SetFn#", 1 },    { "FNote#", 0 },    { "ENote#", 0 },    { "Figure#", 0},    { "TableMarker", 0 },    { "Hyph", 0 },    { "hyph", 0 },    { "Just", 0 },    { "just", 0 },    { "Wid", 0 },    { "wid", 0 },    { "HZone", 2 },    { "DAlign", 1 },    { "Header", 0 },    { "header", 0 },    { "Footer", 0 },    { "footer", 0 },    { "Supp", 1 },    { "CtrPg", 0 },    { "SetFont", 2 },    { "SetBin", 1 },    { "PN0", 0 },    { "PN1", 0 },    { "PN2", 0 },    { "PN3", 0 },    { "PN4", 0 },    { "PN5", 0 },    { "PN6", 0 },    { "PN7", 0 },    { "PN8", 0 },    { NULL, 0 },  /* UnsupportedPlaceHolder -- keeps match_identifier happy */    { "set page number column", 0 },    { "extended tabs", 0 },    { "underline mode", 0 },    { "define column", 0 },    { "footnote attributes", 0 },    { "paragraph numbering style", 0 },    { "numbered paragraph", 0 },    { "begin marked text", 0 },    { "end marked text", 0 },    { "define marked text", 0 },    { "define index mark", 0 },    { "define math columns", 0 },    { "WPCorp obsolete", 0 },    { "WPCorp reserved", 0 },    { "Merge Code", 0},    { "WPCorp undefined", 0 },};/* The file pointer "descriptor" points to our descriptor file * and "input" points to our input file. * * Kinda makes sense that way. */FILE *descriptor, *input;/* And the function match_identifier(s) takes a string and converts * it to its corresponding integer.  Or blows up if it couldn't * find one. */int match_identifier(const char *s){  Identifier *I;  /* Maybe it is a special character */  if (s[0] == '\'' && s[2] == '\'' && s[3] == '\0')    return (int) (unsigned char) s[1];  /* Else it must be a multi-character guy */  for (I = names; I->name; I++)    if (!strcmp(I->name, s)) return typeout + (I - names);  /* Otherwise, I don't know what to do with it */  error(descriptor, "Unknown identifier %s", s);  /*NOTREACHED*/  return 0;}/* check_arity ensures that the expansion string is valid */void check_arity(int ident, char *t){  char *s;  int arity = 0;  if (ident > typeout) arity = names[ident-typeout].arity;  for (s = t; *s; s++) {    if (*s != '%') continue;    switch (*++s) {    case '\n':      if (s != t+1)        error(descriptor, "%s: `%%\\n' not at start of expansion",              names[ident-typeout].name);        break;    case '1':    case 'c':      if (arity < 1) goto bad_escape;      break;    case '2':      if (arity < 2) goto bad_escape;      break;    case '%':      break;    default:bad_escape:      error(descriptor, "%s: invalid escape `%%%c'", names[ident-typeout].name, *s);    }  }}/* expand_backslash() is called when a backslash is encountered in * the descriptor file.  Its job is to parse a backslash-sequence. * The usual C-escapes (\a \b \f \n \r \t \v) are understood, as * well as the octal escape \000 [up to three octal digits] and * the hex escape \xFF [up to two hex digits]. */int expand_backslash(FILE *d) {    int c;    switch (c = getc(d)) {	case 'a': c = '\a'; break;	case 'b': c = '\b'; break;	case 'f': c = '\f'; break;	case 'n': c = '\n'; break;	case 'r': c = '\r'; break;	case 't': c = '\t'; break;	case 'v': c = '\v'; break;	case 'x':	case 'X': c = parse_hex(d); break;	case '0':	case '1':	case '2':	case '3':	case '4':	case '5':	case '6':	case '7': c = parse_octal(d, c); break;	default:  /* c = c; */ break;    }    return c;}/* The function read_identifier() attempts to match an identifier * in the descriptor file.  It returns EOF if the end of the descriptor * file was reached, or the code of the identifier we found. * (or blows up if an error was detected.) * We build the identifier in "s", with the help of our * pool-managing functions above, then discard it, immediately, * since we don't use it any more. */int read_identifier(FILE *d){  register int c;      /* A character we have read */  char *s;    /* The identifier we are building */  int ident;   /* The identifier we found */  /* Skip over comments */  while ((c = next_non_whitespace(d)) == '#')      eat_until_newline(d);  if (c == EOF) return EOF;  /* At this point, "c" contains the first letter of a potential   * identifier.  Let's see what it could possibly be.   */  s = anchor_string();  if (c == '\'') {                      /* a character token */    add_to_string(c);    if ((c = getc(d)) == '\\') c = expand_backslash(d);    add_to_string(c);    if ((c = getc(d)) != '\'')      error(descriptor, "Invalid character identifier");    add_to_string(c);    c = next_non_space_or_tab(d);  } else do {                           /* a name token */    add_to_string(c);    c = next_non_space_or_tab(d);    if (c == '\\') c = expand_backslash(d);  } while (c != EOF && c != '=' && c != '\n');  if (c != '=') error(d, "Identifier not followed by = sign");      /* A boo-boo.  Something bad happened. */  add_to_string('\0');   /* Make it a standard C string. */  finish_string();  ident = match_identifier(s); /* Go find one. */  remove_string(s); /* And we're done with it now. */  return ident;}/* The function grab_expansion() reads expansion text from the * descriptor file and adds it to the pool, returning a pointer * to the string it just created. * * After anchoring a new string, we look for the opening quotation * mark, then start gobbling characters.  Everything gets copied * straight into the string. * */char *grab_expansion(FILE *d){  register int c; /* Characters being read */  char *s;   /* The string we are building */  s = anchor_string();  if (next_non_whitespace(d) != '\"')    error(d, "Quotation mark expected");  /* Now read the stream until we hit another quotation mark. */  while ((c = getc(d)) != EOF && c != '\"') {    if (c == '\\') c = expand_backslash(d);    add_to_string(c);  }  add_to_string('\0');  finish_string();  return s;}/* Ah, now with all of these beautiful functions waiting for us, * we can now write our first Useful Function:  do_descriptor_file. * It reads the descriptor file and loads up the "expansion" array * with the text expansions we are reading from the file. * * If we grabbed the expansion of a "typeout", we type it out * and discard the string. * * We stop when the descriptor file runs dry. * */void do_descriptor_file(FILE *d){  register int ident;  while ((ident = read_identifier(d)) != EOF) {    expansion[ident] = grab_expansion(d);    if (ident == typeout && !silent) {      fputs(expansion[typeout], stderr); remove_string(expansion[typeout]);      expansion[typeout] = NULL;    } else check_arity(ident, expansion[ident]);  }}/************************************************************************//* Reading from the input file                                          *//************************************************************************//* The function verify(c) checks that the next character in the input * stream is indeed "c".  It eats the character, if all is well. * If something went wrong, we complain to stderr, but keep going. */void verify(state *st,int c){  int d = getc(st->infile);  if (d != c) fprintf(stderr, "Warning: Expected %02X but received %02X at pos: %ld.\n", c, d,ftell(st->infile));}/* The function gobble(n) simply eats "n" characters from the input * file. */void gobble(state *st,int n){  while (n--) (void) getc(st->infile);}int last_HRt = 0;                       /* most recent output was HRt *//* Processing a special code simply entails dumping its expansion. * If the expansion is NULL, then we either *   [1] print nothing, if it is a code, *   [2] print the character itself, if it is an ASCII character. * * In dumping its expansion, we expand the following percent-escapes: * *  The percent-escapes are: *      %\n  -- newline if previous character was not a newline *              (meaningful only as first character in sequence) *      %1   -- first parameter, in decimal form *      %2   -- second parameter, in decimal form *      %c   -- first parameter, in character form *      %%   -- literal percent sign * *  all other %-escapes are flagged as warnings (but should never occur, *  since they are trapped at the time the descriptor file is read.) */void process(state *st,int c, int d1, int d2){  char *s;  static int last_newline = 0;  last_HRt = 0;                         /* the killer switch sets this */  if (expansion[c] == NULL) {           /* invent a default action */    if (c < 256) {                      /* single character */      expansion[c] = anchor_string();   /* emits itself */      add_to_string(c); add_to_string('\0');      finish_string();      if (!silent) fprintf(stderr, "Warning: No expansion for %02X (%c)\n", c, c);    } else {                            /* provide null expansion */      expansion[c] = "";      if (!silent) {        fprintf(stderr, "Warning: No expansion for %s\n", names[c-typeout].name);      }    }  }  s = expansion[c];  if (!*s) return;    /* the rest of the code assumes non-null string */  do {    if (*s != '%') putc(*s++,st->outfile);    else {      s++;      switch (*s++) {      case '\n':        if (!last_newline) putc('\n',st->outfile); break;      case '1':        fprintf(st->outfile,"%d", d1); break;      case '2':        fprintf(st->outfile,"%d", d2); break;      case 'c':        putc(d1,st->outfile); break;      case '%':        putc('%',st->outfile); break;      default:        fprintf(stderr, "Internal error:  Invalid escape, %%%c\n", s[-1]);        break;      }    }  } while (*s);  last_newline = s[-1] == '\n';}void unsupported(state *st,int b,int c){  if (!silent && !expansion[c]) {    expansion[c] = "";    fprintf(stderr, "Warning: `%s' [%02X] code not supported\n", names[c-typeout].name,b);  }  process0(st,Comment); fputs(names[c-typeout].name, stdout); process0(st,eComment);}/* The function gobble_until(c) eats characters from the input file * until it reaches a c or reaches EOF. */void gobble_until(state *st,int c){  int i;  while ((i = getc(st->infile)) != EOF && (int) (unsigned char) i != c) do_nothing;}/* line_spacing(l) is called whenever we hit a line-spacing-change command. * The argument is the desired line spacing, multiplied by two. * So single spacing gets a 2, 1.5 spacing gets a 3, etc. */void line_spacing(state *st,int l){  switch (l) {    case 2: process0(st,SS); break;    case 3: process0(st,OHS); break;    case 4: process0(st,DS); break;    case 6: process0(st,TS); break;    default: process1(st,LS, l); break;  }}int environment_status = 0;             /* cleanup at HRt */void leave_environment(state *st,int force_HRt){  if (environment_status) {    process0(st,environment_status);    environment_status = 0;  }  if (force_HRt && !last_HRt) process0(st,HRt);}/* The "note_status" flag has one of three values: *    0   if we are not inside a note *    1   if we are inside a footnote *    2   if we are inside an endnote * * The function handle_note() is called to deal with footnotes and * endnotes.  It adjusts the note_status accordingly. */
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -