📄 wordtest.w

📁 著名算法大师高爷爷设计的语言。此语言结合了Tex和C
💻 W
📖 第 1 页 / 共 2 页
字号:
上一页 12
  }}*q=*qq=NULL;@ We allocate node memory dynamically, in blocks of 100 nodes at a time.We also allocate string memory dynamically, 1000 characters at once(in addition to space for the current string).The variable |l| will be set to the length of the word in |buffer|.@d NODES_PER_BLOCK 100@d CHARS_PER_BLOCK 1000@d out_of_mem(x) {@+fprintf(stderr,"%s: Memory exhausted!\n",*argv);                    return x;@+}@<Set |r| to the address of a new node, and move |buffer| into it@>=if (next_node==bad_node) {  next_node=(node*)calloc(NODES_PER_BLOCK,sizeof(node));  if (next_node==NULL) out_of_mem(-2);  bad_node=next_node+NODES_PER_BLOCK;}r=next_node++;@<Move |buffer| to a new place in the string memory, and make  |r->keyword| point to it@>;@ @<Move |buffer| to a new place...@>=if (next_string+l+1>=bad_string) {@+int block_size=CHARS_PER_BLOCK+l+1;  next_string=(byte*)malloc(block_size);  if (next_string==NULL) out_of_mem(-3);  bad_string=next_string+block_size;}r->keyword=next_string;for (u=buffer,v=next_string;ord[*u]>0;u++,v++) *v=*u;*v='\0';next_string=v+1;@ We had better define the variables we've been assuming in thesestorage allocation routines.@<Local variables@>=node *next_node=NULL, *bad_node=NULL;byte *next_string=NULL, *bad_string=NULL;node *root=NULL;byte *buffer;int l; /* length of current string in |buffer| */@ The mechanisms for sorting the input words are now all in place.We merely need to invoke them at the right times.@<Sort the input into memory@>=buffer=(byte*)malloc(max_length+1);if (buffer==NULL) out_of_mem(-5);while (1) {  @<Set |buffer| to the next word from |stdin|; |goto done| if file ends@>;  if (l) {   @<Search for |buffer| in the treap; |goto found| if it's there@>;   @<Insert the |buffer| word into the treap@>; found:;      }}done:;@ @<Set |buffer| to the next word from |stdin|...@>=u=buffer;@+l=0;while (l<max_length) {  c=getchar();  if (c==EOF) {    if (ferror(stdin)) {      fprintf(stderr,"%s: File read error on standard input!\n",*argv);      return -6;    }    goto done; /* end of file; the current word, if any, is discarded */  }  if (ord[c]<=0) {    if (ord[c]<0) break;  } else {    *u++=(byte)c;    l++;  }}*u=breakchar;@ At the end we want to traverse the treap in symmetric order, so thatwe see its words in alphabetic order. We might as well destroy thetreap structure as we do this. During this phase, |root| will pointto a stack of nodes that remain to be visited (followed by traversalof their right subtrees).@<Output all input words that aren't in dictionaries@>=if (root!=NULL) {@+register node *p,*q;  p=root;  root=NULL;  while (1) {    while (p->left!=NULL) {      q=p->left;      p->left=root; /* |left| links are now used for the stack */      root=p;      p=q;    }visit: @<Output |p->keyword|, if it's not in the dictionaries@>;    if (p->right==NULL) {      if (root==NULL) break; /* the stack is empty, we're done */      p=root;      root=root->left; /* pop the stack */      goto visit;    } else p=p->right;  }}@* The dictionaries. So now all we have to do is provide a mechanismfor reading the words in the dictionaries. The dictionaries are sorted,and by now the input words have been sorted too.So we need only scan through thedictionaries once; we'll try to zoom through as quickly as possible.First we need data structures. There will be an array of pointers to filenodes,for all dictionary files currently open. Each filenode will containa buffer of size |BUFSIZ+1| for raw input bytes not yet scanned,as well as a buffer of size |MAX_LENGTH_LIMIT+1| for the current wordbeing considered.@<Typedefs@>=typedef struct filenode_struct {  struct filenode_struct *link; /* pointer to next open file */  FILE *dfile; /* dictionary file */  byte buf[BUFSIZ+1], curword[MAX_LENGTH_LIMIT+1];  byte *pos; /* current position in |buf| */  byte *limit; /* end of input bytes in |buf| */  byte *endword; /* the first break character in |curword| */} filenode;@ @<Allocate data structures...@>=if (targc) {  curfile=(filenode*)calloc(targc,sizeof(filenode));  if (curfile==NULL) out_of_mem(-7);  for (f=curfile;f<curfile+targc-1;f++) f->link=f+1;  f->link=curfile; /* circular linking */} else curfile=NULL;@ @<Local...@>=filenode *curfile; /* current filenode of interest */filenode *f; /* temporary register for filenode list processing */@ @<Open the dictionary file named |*targv|@>={  curfile->dfile=fopen((char*)*targv,"r");  if (curfile->dfile==NULL) {    fprintf(stderr,"%s: Can't open dictionary file %s!\n",*argv,(char*)*targv);    return -8;  }  curfile->pos=curfile->limit=curfile->buf; /* |buf| is empty */  curfile->buf[0]='\0';  curfile->endword=curfile->curword; /* |curword| is empty too */  curfile->curword[0]=breakchar;  curfile=curfile->link; /* move to next filenode */}@ We will implicitly merge the dictionaries together by using a brute forcescheme that works fine when there are only a few of them. Namely,|curfile| will point to a file having the currently smallestcurrent word. To get to the next word of the merge, we advance to thenext word in that file, comparing it with the current words of theother files to see if |curfile| should switch to one of them.When we get to the end of a file, its filenode simply leaves the circularlist. Eventually the list will be empty, and we will set |curfile| to|NULL|; we will then have seen all the dictionary words in order.@ @<Output |p->keyword|, if it's not in the dictionaries@>=while (curfile!=NULL) {  for (u=p->keyword,v=curfile->curword;ord[*u]==ord[*v];u++,v++) ;  if (*u=='\0' && *v==breakchar) goto word_done;    /* we found it in the dictionary */  if (ord[*u]<ord[*v]) break; /* we didn't find it */  @<Advance to the next dictionary word@>;}@<Print |p->keyword| and |breakchar| on |stdout|@>@;word_done:;@ @<Print |p->keyword| and |breakchar| on |stdout|@>=for (u=p->keyword;*u;u++) putchar(*u);putchar(breakchar);@ @<Advance...@>=@<Read a new word into |curfile->curword|, as fast as you can@>;@<Adjust |curfile|, if necessary, to point to a file with minimal  |curword|@>;@ The dictionaries are supposed to be in order, and they shouldn'tcontain nulls. But if they fail to meet these criteria, we don't want{\tt wordtest} to crash; it should just run more slowly and/or morepeculiarly.The logic of the code here removes null characters, at the cost of speed.If the dictionary contains words out of order, say $\alpha>\beta$ where$\alpha$ precedes $\beta$ in the file, the effect will be as if $\beta$were not present. (In particular, if the dictionary would happen to have a nullword because of a break character inserted by our |max_length| logic,that null word would cause no harm, because a null word is always less thanany nonnull word.)A null character always appears in |curfile->limit|.@<Read a new word into |curfile->curword|...@>=v=curfile->curword;l=max_length; /* here |l| represents max characters to put in |curword| */while (1) {@+register byte *w=curfile->limit;  u=curfile->pos;  if (u+l>=w)    while (ord[*u]>0) *v++=*u++; /* this is the inner loop */  else {    w=u+l;    c=*w;    *w='\0'; /* temporarily store a null to avoid overlong string */    while (ord[*u]>0) *v++=*u++; /* this too is the inner loop */    *w=c; /* restore the damaged byte */  }  if (ord[*u]<0) {    curfile->pos=u+1; /* good, we found the next break character */    break;  }  l-=u-curfile->pos;  if (l==0) { /* |max_length| reached */    curfile->pos=u;    break;  }  if (u==w) { /* we're at |curfile->limit| */    @<Refill |curfile->buf|; or remove the current file from the      circular list and |goto update_done|, if it has ended@>;  } else curfile->pos=u+1; /* bypass a null character in the dictionary */}curfile->endword=v;*v=breakchar;update_done:;@ @<Refill |curfile->buf|...@>=if (ferror(curfile->dfile)) {  fprintf(stderr,"%s: File read error on dictionary file!\n",*argv);  return -9;}if (feof(curfile->dfile)) {  f=curfile->link;  if (f==curfile) curfile=NULL; /* the last dictionary file has ended */  else {    while (f->link!=curfile) f=f->link;    f->link=curfile->link; /* remove a filenode from the circular list */    curfile=f; /* and point to one of the remaining filenodes */  }  goto update_done;}curfile->limit=curfile->buf+fread(curfile->buf,1,BUFSIZ,curfile->dfile);*curfile->limit='\0';curfile->pos=curfile->buf;@ @<Adjust |curfile|, if necessary...@>=if (curfile!=NULL) {@+filenode *sentinel=curfile;  for (f=curfile->link;f!=sentinel;f=f->link)    @<Change |curfile| to |f| if |f->curword<curfile->curword|@>;    }@ @<Change |curfile| to |f| if |f->curword<curfile->curword|@>={  *f->endword='\0';  for (u=f->curword,v=curfile->curword;ord[*u]==ord[*v];u++,v++) ;  if (ord[*u]<ord[*v]) curfile=f;  *f->endword=breakchar;}@* Index. Here is a list of the identifiers used by {\tt wordtest},showing the sections in which they appear, underlined at pointsof definition.
上一页 12
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -