📄 gb_words.w

📁 模拟器提供了一个简单易用的平台
💻 W
📖 第 1 页 / 共 2 页
字号:
上一页 12
@d nodes_per_block 111@<Type...@>=typedef struct node_struct {  long key; /* the sort key (weight plus $2^{30}$) */  struct node_struct *link; /* links the nodes together */  char wd[5]; /* five-letter word                   (which typically consumes eight bytes, too bad) */} node;@ @<Local...@>=node *next_node; /* the next node available for allocation */node *bad_node; /* if |next_node=bad_node|, the node isn't really there */node *stack_ptr; /* the most recently created node */node *cur_node; /* current node being created or examined */@ @<Private v...@>=static Area node_blocks; /* the memory area for blocks of nodes */@ @<Input the qualifying words...@>=next_node=bad_node=stack_ptr=NULL;if (gb_open("words.dat")!=0)  panic(early_data_fault);   /* couldn't open |"words.dat"| using GraphBase conventions;                |io_errors| tells why */do @<Read one word, and put it on the stack if it qualifies@>@;  while (!gb_eof());if (gb_close()!=0)  panic(late_data_fault);    /* something's wrong with |"words.dat"|; see |io_errors| */@ @<Read one...@>={@+register long j; /* position in |word| */  for (j=0; j<5; j++) word[j]=gb_char();  @<Compute the weight |wt|@>;  if (wt>=wt_threshold) { /* it qualifies */    @<Install |word| and |wt| in a new node@>;    nn++;  }  gb_newline();}@ @d copy5(y,x) {@+    *(y)=*(x);@+    *((y)+1)=*((x)+1);@+    *((y)+2)=*((x)+2);    *((y)+3)=*((x)+3);@+    *((y)+4)=*((x)+4);@+  }@<Install...@>=if (next_node==bad_node) {  cur_node=gb_typed_alloc(nodes_per_block,node,node_blocks);  if (cur_node==NULL)    panic(no_room+1); /* out of memory already */  next_node=cur_node+1;  bad_node=cur_node+nodes_per_block;}@+else cur_node=next_node++;cur_node->key=wt+0x40000000;cur_node->link=stack_ptr;copy5(cur_node->wd,word);stack_ptr=cur_node;@ Recall that |gb_number()| returns 0, without giving an error, if nodigit is present in the current position of the file being read. Thisimplies that the \.{words.dat} file need not include zero countsexplicitly. Furthermore, we can arrange things so that trailing zerocounts are unnecessary; commas can be omitted if all countsfollowing them on the current line are zero.@<Compute the weight...@>={@+register long *p,*q; /* pointers to $C_j$ and $w_j$ */  register long c; /* current count */  switch (gb_char()) {   case '*': wt=wt_vector[0];@+break; /* `common' word */   case '+': wt=wt_vector[1];@+break; /* `advanced' word */   case ' ': case'\n': wt=0;@+break; /* `unusual' word */   default: panic(syntax_error); /* unknown type of word */  }  p=&max_c[0]; q=&wt_vector[2];  do@+{    if (p==&max_c[7])      panic(syntax_error+1); /* too many counts */    c=gb_number(10);    if (c>*p++)      panic(syntax_error+2); /* count too large */    wt += c * *q++;  }@+while (gb_char()==',');}@* The output phase. Once the input phase has examined all of\.{words.dat}, we are left with a stack of |nn| nodes containing thequalifying words, starting at |stack_ptr|.The next step is to call |gb_linksort|, which takes the qualifying wordsand distributes them into the 128 lists |gb_sorted[j]|, for |0<=j<128|.We can then access the words in order of decreasing weight by reading throughthese lists, starting with |gb_sorted[127]| and ending with |gb_sorted[0]|.(See the documentation of |gb_linksort| in the {\sc GB\_\,SORT} module.)The output phase therefore has the following general outline:@<Sort and output...@>=gb_linksort(stack_ptr);@<Allocate storage for the new graph; adjust |n| if it is zero or too large@>;if (gb_trouble_code==0 && n) {  register long j; /* runs through sorted lists */  register node *p; /* the current node being output */  nn=n;  for (j=127; j>=0; j--)    for (p=(node*)gb_sorted[j]; p; p=p->link) {      @<Add the word |p->wd| to the graph@>;      if (--nn==0) goto done;    }}done:gb_free(node_blocks);@ The only slightly unusual data structure needed is a set of five hash tables,one for each of the strings of four letters obtained by suppressinga single letter of a five-letter word. For example, a word like `\.{words}'will lead to entries for `\.{\ ords}', `\.{w\ rds}, `\.{wo\ ds}', `\.{wor\ s}',and `\.{word\ }', one in each of the hash tables.@d hash_prime 6997 /* a prime number larger than the total number of words */@<Type...@>=typedef Vertex *hash_table[hash_prime];@ @<Local...@>=Vertex *cur_vertex; /* the current vertex being created or examined */char *next_string; /* where we'll store the next five-letter word */@ @<Private v...@>=static hash_table *htab; /* five dynamically allocated hash tables */@ The weight of each word will be stored in the utility field |u.I| of its|Vertex| record. The position in which adjacent words differ will bestored in utility field |a.I| of the |Arc| records between them.@d weight u.I /* weighted frequencies */@d loc a.I /* index of difference (0, 1, 2, 3, or 4) */@(gb_words.h@>=#define weight @[u.I@] /* repeat the definitions in the header file */#define loc @[a.I@]@ @<Allocate storage for the new graph...@>=if (n==0 || nn<n)  n=nn;new_graph=gb_new_graph(n);if (new_graph==NULL)  panic(no_room); /* out of memory before we're even started */if (wt_vector==default_wt_vector)  sprintf(new_graph->id,"words(%lu,0,%ld,%ld)",n,wt_threshold,seed);else sprintf(new_graph->id,     "words(%lu,{%ld,%ld,%ld,%ld,%ld,%ld,%ld,%ld,%ld},%ld,%ld)",     n,wt_vector[0],wt_vector[1],wt_vector[2],wt_vector[3],wt_vector[4],     wt_vector[5],wt_vector[6],wt_vector[7],wt_vector[8],wt_threshold,seed);strcpy(new_graph->util_types,"IZZZZZIZZZZZZZ");cur_vertex=new_graph->vertices;next_string=gb_typed_alloc(6*n,char,new_graph->data);htab=gb_typed_alloc(5,hash_table,new_graph->aux_data);@ @<Add the word...@>={@+register char *q; /* the new word */  q=cur_vertex->name=next_string;  next_string+=6;  copy5(q,p->wd);  cur_vertex->weight=p->key-0x40000000;  @<Add edges for all previous words |r| that nearly match |q|@>;  cur_vertex++;}@ The length of each edge in a |words| graph is set to~1; thecalling routine can change it later if desired.@d mtch(i) (*(q+i)==*(r+i))@d match(a,b,c,d) (mtch(a)&&mtch(b)&&mtch(c)&&mtch(d))@d store_loc_of_diff(k) cur_vertex->arcs->loc=(cur_vertex->arcs-1)->loc=k@d ch(q) ((long)*(q))@d hdown(k) h==htab[k]? h=htab[k+1]-1: h--@<Add edges for all previous words |r| that nearly match |q|@>={@+register char *r; /* previous word possibly adjacent to |q| */  register Vertex **h; /* hash address for linear probing */  register long raw_hash; /* five-letter hash code before remaindering */  raw_hash=(((((((ch(q)<<5)+ch(q+1))<<5)+ch(q+2))<<5)+ch(q+3))<<5)+ch(q+4);  for (h=htab[0]+(raw_hash-(ch(q)<<20)) % hash_prime; *h; hdown(0)) {    r=(*h)->name;    if (match(1,2,3,4))      gb_new_edge(cur_vertex,*h,1L), store_loc_of_diff(0);  }  *h=cur_vertex;  for (h=htab[1]+(raw_hash-(ch(q+1)<<15)) % hash_prime; *h; hdown(1)) {    r=(*h)->name;    if (match(0,2,3,4))      gb_new_edge(cur_vertex,*h,1L), store_loc_of_diff(1);  }  *h=cur_vertex;  for (h=htab[2]+(raw_hash-(ch(q+2)<<10)) % hash_prime; *h; hdown(2)) {    r=(*h)->name;    if (match(0,1,3,4))      gb_new_edge(cur_vertex,*h,1L), store_loc_of_diff(2);  }  *h=cur_vertex;  for (h=htab[3]+(raw_hash-(ch(q+3)<<5)) % hash_prime; *h; hdown(3)) {    r=(*h)->name;    if (match(0,1,2,4))      gb_new_edge(cur_vertex,*h,1L), store_loc_of_diff(3);  }  *h=cur_vertex;  for (h=htab[4]+(raw_hash-ch(q+4)) % hash_prime; *h; hdown(4)) {    r=(*h)->name;    if (match(0,1,2,3))      gb_new_edge(cur_vertex,*h,1L), store_loc_of_diff(4);  }  *h=cur_vertex;}@* Finding a word. After |words| has created a graph |g|, the user canremove the hash tables by calling |gb_free(g->aux_data)|. But if thehash tables have not been removed, another procedure can be used tofind vertices that match or nearly match a given word.The subroutine call |find_word(q,f)| will return a pointer to a vertexthat matches a given five-letter word~|q|, if that word is in the graph;otherwise, it returns |NULL| (i.e., \.{NULL}), after calling |f(v)| foreach vertex~|v| whose word matches |q| in all but one letter position.@p Vertex *find_word(q,f)  char *q;  void @[@] (*f)(); /* |*f| should take one argument, of type |Vertex *|,                        or |f| should be |NULL| */{@+register char *r; /* previous word possibly adjacent to |q| */  register Vertex **h; /* hash address for linear probing */  register long raw_hash; /* five-letter hash code before remaindering */  raw_hash=(((((((ch(q)<<5)+ch(q+1))<<5)+ch(q+2))<<5)+ch(q+3))<<5)+ch(q+4);  for (h=htab[0]+(raw_hash-(ch(q)<<20)) % hash_prime; *h; hdown(0)) {    r=(*h)->name;    if (mtch(0) && match(1,2,3,4))      return *h;  }  @<Invoke |f| on every vertex that is adjacent to word~|q|@>;  return NULL;} @ @<Invoke |f| on every vertex that is adjacent to word~|q|@>=if (f) {  for (h=htab[0]+(raw_hash-(ch(q)<<20)) % hash_prime; *h; hdown(0)) {    r=(*h)->name;    if (match(1,2,3,4))      (*f)(*h);  }  for (h=htab[1]+(raw_hash-(ch(q+1)<<15)) % hash_prime; *h; hdown(1)) {    r=(*h)->name;    if (match(0,2,3,4))      (*f)(*h);  }  for (h=htab[2]+(raw_hash-(ch(q+2)<<10)) % hash_prime; *h; hdown(2)) {    r=(*h)->name;    if (match(0,1,3,4))      (*f)(*h);  }  for (h=htab[3]+(raw_hash-(ch(q+3)<<5)) % hash_prime; *h; hdown(3)) {    r=(*h)->name;    if (match(0,1,2,4))      (*f)(*h);  }  for (h=htab[4]+(raw_hash-ch(q+4)) % hash_prime; *h; hdown(4)) {    r=(*h)->name;    if (match(0,1,2,3))      (*f)(*h);  }}@* Index. Here is a list that shows where the identifiers of this program aredefined and used.
上一页 12
💿 文件大小 1340 K
👤 上传用户 zyhunicom
📂 所属分类网络
🏷️ 相关标签

#模拟
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -