⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 wp2x.c

📁 harvest是一个下载html网页得机器人
💻 C
📖 第 1 页 / 共 3 页
字号:
int note_status = 0;void handle_note(state *st){  if (note_status) {    leave_environment(st,1); process0(st,note_status); note_status = 0;  } else {          /* Decide whether it is an endnote or a footnote */    if (getc(st->infile) & 2)  { process0(st,En); note_status = eEn; gobble(st,5); }                else  { process0(st,Fn); note_status = eFn; gobble(st,7); }    verify(st,0xFF);    gobble(st,2);                                  /* margins */  }}/* The tab_table is a bit field.  Each set bit represents a tabstop. * Note, however, that the bits are counted from MSB to LSB. * * The tab_attribute_table is a nybble field.  The n'th nybble represents * the attributes of the n'th tabstop. */unsigned char tab_table[32];unsigned char tab_attribute_table[20];int next_attribute;void process_tab_attribute(state *st,int i) {    int b;    if (next_attribute & 1) b = tab_attribute_table[next_attribute/2] & 3;    else b = (tab_attribute_table[next_attribute/2] / 16) & 3;    next_attribute++;    /* Bottom two bites define what kind of tab.     * Bit 2 is set if we need dot filling.     * Bit 3 is unused.     * We `&3' above because we won't support dot filling.     */    process1(st,SetTab + b, i);}void process_tab_table(state *st) {    int i;    next_attribute = 0;    process0(st,BeginTabs);    for (i = 0; i < 32; i++) {        if (tab_table[i] == 0) continue;    /* early out */        if (tab_table[i] & 0x80) process_tab_attribute(st,i * 8 + 0);        if (tab_table[i] & 0x40) process_tab_attribute(st,i * 8 + 1);        if (tab_table[i] & 0x20) process_tab_attribute(st,i * 8 + 2);        if (tab_table[i] & 0x10) process_tab_attribute(st,i * 8 + 3);        if (tab_table[i] & 0x08) process_tab_attribute(st,i * 8 + 4);        if (tab_table[i] & 0x04) process_tab_attribute(st,i * 8 + 5);        if (tab_table[i] & 0x02) process_tab_attribute(st,i * 8 + 6);        if (tab_table[i] & 0x01) process_tab_attribute(st,i * 8 + 7);    }    process0(st,EndTabs);}void handle_tabs(state *st) {    /* pad the tables to force no new tabs, and left tabs everywhere */    memset(tab_table, 0, sizeof(tab_table));    memset(tab_attribute_table, 0, sizeof(tab_attribute_table));    fread(tab_table, 20, 1, input);     /* old-style tabs */    process_tab_table(st);}void handle_extended_tabs(state *st) {    fread(tab_table, 32, 1, input);    fread(tab_attribute_table, 20, 1, input);    process_tab_table(st);}/* The FF_status flag tells us what we should do when we encounter an 0xFF. * It contains the token code of the active code, or 0 if no code is active. */int FF_status = 0;void handle_FF(state *st){    if (FF_status) {                            /* finish header/footer */        leave_environment(st,1);        process0(st,FF_status);        gobble(st,2);        verify(st,0xD1);        FF_status = 0;    } else process0(st,0xFF);}/* * read short */short read_word(FILE *f){  short a;  int   c;  c=getc(f);  a=c;  c=getc(f);  a=(a | c<<8);  return(a);}long read_dword(FILE *f){  long a;  int  c1,c2,c3,c4;  c1=getc(f);  c2=getc(f);  c3=getc(f);  c4=getc(f);  a=(c4 << 24)|(c3<<16)|(c2<<8)|(c1);  return(a);}/* * Safe memory allocator */void *safemalloc(int size){  void *memory;  memory=malloc(size);  if(!memory) {    fprintf(stderr,"Ran out of memory trying to allocate %d bytes\n",size);    abort();  }  return(memory);}/* The function process_token does all of the real work. * Given the first character of a token, we eat up everything * that belongs to that token.  This routine might be called * recursively, since some tokens are defined in terms of other * tokens.  (For example, the subscript code is expanded as *   [Sub] <character being subscripted> [sub] * and the <character being subscripted> might involve other token * expansions; specifically, it might be an IBM Extended character.) * * Luckily, most of our tokens are not recursive.  The macro *     bracket(before, after) * does the recursive stuff for us, bracketing the next token * between expansions of "before" and "after". * */#define bracket(before,after) process0(st,before); process_token(); \                              process0(st,after);int process_token(state *st){  int c = getc(st->infile);  int len;  struct WPToken *wpt;  char smallBuf[256];  struct VariableCode *group;  int stat;  if (c == EOF) return 0;  if (!--blipcount && !silent) {    blipcount = blipinterval;    putc('.', stderr);  }  wpt=&wp_tokens[c];  st->lastToken=wpt;  stat=1;  switch(wpt->wp_type) {  case reserved:    /* ignore it! */    if(!silent) {      fprintf(stderr,"Reserved code [%02x] seen\n",c);    }    break;  case literal:    /* output it */    putc(wpt->wp_intdata,st->outfile);    break;  case singleByte:    assert(wpt->wp_handleFunction!=NULL);    (*wpt->wp_handleFunction)(st,wpt,NULL);    break;  case fixedLength:    smallBuf[0]=c;    if(fread(smallBuf+1,wpt->wp_size-1,1,st->infile)!=1) {      perror("Reading fixedLength");      exit(5);    }    assert(wpt->wp_handleFunction!=NULL);    if(smallBuf[wpt->wp_size-1]!=wpt->wp_code) {      fprintf(stderr,"Fixed Length block [%02x] incorrectly terminated by [%02x] at pos %ld\n",	      wpt->wp_code,smallBuf[wpt->wp_size-1],ftell(st->infile));    } else {      stat=(*wpt->wp_handleFunction)(st,wpt,smallBuf);    }    break;  case varLength:    /* get subfunction */    c=getc(input);    len=read_word(input);    group=(struct VariableCode *)safemalloc(sizeof(struct VariableCode)+len);    if(fread(group->data,len,1,input)!=1) {      perror("reading variable group");      exit(5);    }    group->func=wpt->wp_code;    group->subFunc=c;    group->len=len;    assert(wpt->wp_handleFunction!=NULL);    stat=(*wpt->wp_handleFunction)(st,wpt,(void *)group);    free(group);    break;  }  return stat;}/* Now do the other Useful Function. */int process_input(state *st){  process0(st,BEGIN);  while(process_token(st)) do_nothing;  process0(st,END);  return 1;}int wptoh(int wpl){  char *p,*q;  int  result;  /* convert to network byte order */  /* WP is Intel byte order        */  p=(char *)(&wpl);  q=(char *)(&result);  q[0]=p[3];  q[1]=p[2];  q[2]=p[1];  q[3]=p[0];  return(htonl(result));}static char *WPproducts[]={  "none",  "WordPerfect",  "Shell",  "Notebook",  "Calculator",  "File Manager",  "Calendar",  "Program Editor/Ed Editor",  "Macro Editor",  "Plan Perfect",  "DataPerfect",  "Mail",  "Printer",  "Scheduler",  "WordPerfect Office",  "DrawPerfect",  "LetterPerfect",};#define MAX_WP_PRODUCT 16int process_preamble(state *st){  int c;  char prefix[16];  int  documentAreaOffset;  c=getc(st->infile);  if(c!=255) {    /* not a WP 5 file! */    return 0;  }  prefix[0]=c;  if(fread(prefix+1,15,1,st->infile)!=1) {    perror("No prefix block");    return 0;  }  /* check for WP Corp token */  if(prefix[1]!='W' || prefix[2]!='P' || prefix[3]!='C') {    fprintf(stderr,"Not a recognized file type: %02x %02x %02x\n",	    prefix[1],prefix[2],prefix[3]);    return 0;  }  documentAreaOffset=*((int *)&prefix[4]);  documentAreaOffset=wptoh(documentAreaOffset);  fseek(st->infile,documentAreaOffset,SEEK_SET);  fprintf(stderr,"Document created with %s %d.%d\nDocument starts at: %d [%4x]\n",	  (prefix[8]>MAX_WP_PRODUCT) ? "unknown" : WPproducts[prefix[8]],	  (prefix[10]==0 ? 5 : prefix[10]),	  prefix[11],documentAreaOffset,documentAreaOffset);  return 1;}/************************************************************************//* The main program                                                     *//************************************************************************//* First, a pretty little function which tries to open a file and * complains loudly if it cannot. */FILE *efopen(const char *s, const char *m){  FILE *fp = fopen(s, m);  if (fp == NULL) {    fprintf(stderr, "Error: Cannot open %s", s);    fprintf(stderr, " (%s)\n", s, strerror(errno));    fprintf(stderr, "\n");    exit(1);  }  return fp;}/* Our main program does very little, really. * * After checking the command line, it proceeds to open the descriptor * file in text mode, and the input file in binary mode. * It then calls our two Useful Functions in turn, closing each file * after it has served its purpose. */int Cdecl main(int argc, char **argv){  extern char *optarg;  extern int optind;  int    errflg=0;  int    c;  char   *descriptorFileName;  state  masterState;  extern FILE   *dopen(char *);  while((c=getopt(argc,argv,"sn:v"))!=-1) {    switch (c) {    case 's':      silent = 1;      break;    case 'n':      blipinterval = atoi(optarg);      break;    case 'v':      fprintf(stderr,"wp2x version " VERSION " patchlevel %d\n",PATCHLEVEL);      errflg++;      break;    default:      errflg++;      break;    }  }  if (errflg) {usage:    fprintf(stderr, "usage: wp2x [-v] [-s][-n X] descriptor input > output\n\t -v for version number\n");    exit(2);  }  blipcount = blipinterval;  if((argc - optind)<2) {    goto usage;  }  descriptorFileName=argv[optind];  fprintf(stderr,"Reading config file: %s\n",descriptorFileName);  descriptor=dopen(descriptorFileName);  do_descriptor_file(descriptor);  fclose(descriptor);  for(optind++; optind < argc; optind++) {    input = efopen(argv[optind], "rb");    masterState.infile=input;    masterState.outfile=stdout;    if(!process_preamble(&masterState)) {      exit(10);    }    if(!process_input(&masterState)) {      exit(11);    }    fclose(input);  }  return 0;}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -