📄 gb_words.w
字号:
@d nodes_per_block 111@<Type...@>=typedef struct node_struct { long key; /* the sort key (weight plus $2^{30}$) */ struct node_struct *link; /* links the nodes together */ char wd[5]; /* five-letter word (which typically consumes eight bytes, too bad) */} node;@ @<Local...@>=node *next_node; /* the next node available for allocation */node *bad_node; /* if |next_node=bad_node|, the node isn't really there */node *stack_ptr; /* the most recently created node */node *cur_node; /* current node being created or examined */@ @<Private v...@>=static Area node_blocks; /* the memory area for blocks of nodes */@ @<Input the qualifying words...@>=next_node=bad_node=stack_ptr=NULL;if (gb_open("words.dat")!=0) panic(early_data_fault); /* couldn't open |"words.dat"| using GraphBase conventions; |io_errors| tells why */do @<Read one word, and put it on the stack if it qualifies@>@; while (!gb_eof());if (gb_close()!=0) panic(late_data_fault); /* something's wrong with |"words.dat"|; see |io_errors| */@ @<Read one...@>={@+register long j; /* position in |word| */ for (j=0; j<5; j++) word[j]=gb_char(); @<Compute the weight |wt|@>; if (wt>=wt_threshold) { /* it qualifies */ @<Install |word| and |wt| in a new node@>; nn++; } gb_newline();}@ @d copy5(y,x) {@+ *(y)=*(x);@+ *((y)+1)=*((x)+1);@+ *((y)+2)=*((x)+2); *((y)+3)=*((x)+3);@+ *((y)+4)=*((x)+4);@+ }@<Install...@>=if (next_node==bad_node) { cur_node=gb_typed_alloc(nodes_per_block,node,node_blocks); if (cur_node==NULL) panic(no_room+1); /* out of memory already */ next_node=cur_node+1; bad_node=cur_node+nodes_per_block;}@+else cur_node=next_node++;cur_node->key=wt+0x40000000;cur_node->link=stack_ptr;copy5(cur_node->wd,word);stack_ptr=cur_node;@ Recall that |gb_number()| returns 0, without giving an error, if nodigit is present in the current position of the file being read. Thisimplies that the \.{words.dat} file need not include zero countsexplicitly. Furthermore, we can arrange things so that trailing zerocounts are unnecessary; commas can be omitted if all countsfollowing them on the current line are zero.@<Compute the weight...@>={@+register long *p,*q; /* pointers to $C_j$ and $w_j$ */ register long c; /* current count */ switch (gb_char()) { case '*': wt=wt_vector[0];@+break; /* `common' word */ case '+': wt=wt_vector[1];@+break; /* `advanced' word */ case ' ': case'\n': wt=0;@+break; /* `unusual' word */ default: panic(syntax_error); /* unknown type of word */ } p=&max_c[0]; q=&wt_vector[2]; do@+{ if (p==&max_c[7]) panic(syntax_error+1); /* too many counts */ c=gb_number(10); if (c>*p++) panic(syntax_error+2); /* count too large */ wt += c * *q++; }@+while (gb_char()==',');}@* The output phase. Once the input phase has examined all of\.{words.dat}, we are left with a stack of |nn| nodes containing thequalifying words, starting at |stack_ptr|.The next step is to call |gb_linksort|, which takes the qualifying wordsand distributes them into the 128 lists |gb_sorted[j]|, for |0<=j<128|.We can then access the words in order of decreasing weight by reading throughthese lists, starting with |gb_sorted[127]| and ending with |gb_sorted[0]|.(See the documentation of |gb_linksort| in the {\sc GB\_\,SORT} module.)The output phase therefore has the following general outline:@<Sort and output...@>=gb_linksort(stack_ptr);@<Allocate storage for the new graph; adjust |n| if it is zero or too large@>;if (gb_trouble_code==0 && n) { register long j; /* runs through sorted lists */ register node *p; /* the current node being output */ nn=n; for (j=127; j>=0; j--) for (p=(node*)gb_sorted[j]; p; p=p->link) { @<Add the word |p->wd| to the graph@>; if (--nn==0) goto done; }}done:gb_free(node_blocks);@ The only slightly unusual data structure needed is a set of five hash tables,one for each of the strings of four letters obtained by suppressinga single letter of a five-letter word. For example, a word like `\.{words}'will lead to entries for `\.{\ ords}', `\.{w\ rds}, `\.{wo\ ds}', `\.{wor\ s}',and `\.{word\ }', one in each of the hash tables.@d hash_prime 6997 /* a prime number larger than the total number of words */@<Type...@>=typedef Vertex *hash_table[hash_prime];@ @<Local...@>=Vertex *cur_vertex; /* the current vertex being created or examined */char *next_string; /* where we'll store the next five-letter word */@ @<Private v...@>=static hash_table *htab; /* five dynamically allocated hash tables */@ The weight of each word will be stored in the utility field |u.I| of its|Vertex| record. The position in which adjacent words differ will bestored in utility field |a.I| of the |Arc| records between them.@d weight u.I /* weighted frequencies */@d loc a.I /* index of difference (0, 1, 2, 3, or 4) */@(gb_words.h@>=#define weight @[u.I@] /* repeat the definitions in the header file */#define loc @[a.I@]@ @<Allocate storage for the new graph...@>=if (n==0 || nn<n) n=nn;new_graph=gb_new_graph(n);if (new_graph==NULL) panic(no_room); /* out of memory before we're even started */if (wt_vector==default_wt_vector) sprintf(new_graph->id,"words(%lu,0,%ld,%ld)",n,wt_threshold,seed);else sprintf(new_graph->id, "words(%lu,{%ld,%ld,%ld,%ld,%ld,%ld,%ld,%ld,%ld},%ld,%ld)", n,wt_vector[0],wt_vector[1],wt_vector[2],wt_vector[3],wt_vector[4], wt_vector[5],wt_vector[6],wt_vector[7],wt_vector[8],wt_threshold,seed);strcpy(new_graph->util_types,"IZZZZZIZZZZZZZ");cur_vertex=new_graph->vertices;next_string=gb_typed_alloc(6*n,char,new_graph->data);htab=gb_typed_alloc(5,hash_table,new_graph->aux_data);@ @<Add the word...@>={@+register char *q; /* the new word */ q=cur_vertex->name=next_string; next_string+=6; copy5(q,p->wd); cur_vertex->weight=p->key-0x40000000; @<Add edges for all previous words |r| that nearly match |q|@>; cur_vertex++;}@ The length of each edge in a |words| graph is set to~1; thecalling routine can change it later if desired.@d mtch(i) (*(q+i)==*(r+i))@d match(a,b,c,d) (mtch(a)&&mtch(b)&&mtch(c)&&mtch(d))@d store_loc_of_diff(k) cur_vertex->arcs->loc=(cur_vertex->arcs-1)->loc=k@d ch(q) ((long)*(q))@d hdown(k) h==htab[k]? h=htab[k+1]-1: h--@<Add edges for all previous words |r| that nearly match |q|@>={@+register char *r; /* previous word possibly adjacent to |q| */ register Vertex **h; /* hash address for linear probing */ register long raw_hash; /* five-letter hash code before remaindering */ raw_hash=(((((((ch(q)<<5)+ch(q+1))<<5)+ch(q+2))<<5)+ch(q+3))<<5)+ch(q+4); for (h=htab[0]+(raw_hash-(ch(q)<<20)) % hash_prime; *h; hdown(0)) { r=(*h)->name; if (match(1,2,3,4)) gb_new_edge(cur_vertex,*h,1L), store_loc_of_diff(0); } *h=cur_vertex; for (h=htab[1]+(raw_hash-(ch(q+1)<<15)) % hash_prime; *h; hdown(1)) { r=(*h)->name; if (match(0,2,3,4)) gb_new_edge(cur_vertex,*h,1L), store_loc_of_diff(1); } *h=cur_vertex; for (h=htab[2]+(raw_hash-(ch(q+2)<<10)) % hash_prime; *h; hdown(2)) { r=(*h)->name; if (match(0,1,3,4)) gb_new_edge(cur_vertex,*h,1L), store_loc_of_diff(2); } *h=cur_vertex; for (h=htab[3]+(raw_hash-(ch(q+3)<<5)) % hash_prime; *h; hdown(3)) { r=(*h)->name; if (match(0,1,2,4)) gb_new_edge(cur_vertex,*h,1L), store_loc_of_diff(3); } *h=cur_vertex; for (h=htab[4]+(raw_hash-ch(q+4)) % hash_prime; *h; hdown(4)) { r=(*h)->name; if (match(0,1,2,3)) gb_new_edge(cur_vertex,*h,1L), store_loc_of_diff(4); } *h=cur_vertex;}@* Finding a word. After |words| has created a graph |g|, the user canremove the hash tables by calling |gb_free(g->aux_data)|. But if thehash tables have not been removed, another procedure can be used tofind vertices that match or nearly match a given word.The subroutine call |find_word(q,f)| will return a pointer to a vertexthat matches a given five-letter word~|q|, if that word is in the graph;otherwise, it returns |NULL| (i.e., \.{NULL}), after calling |f(v)| foreach vertex~|v| whose word matches |q| in all but one letter position.@p Vertex *find_word(q,f) char *q; void @[@] (*f)(); /* |*f| should take one argument, of type |Vertex *|, or |f| should be |NULL| */{@+register char *r; /* previous word possibly adjacent to |q| */ register Vertex **h; /* hash address for linear probing */ register long raw_hash; /* five-letter hash code before remaindering */ raw_hash=(((((((ch(q)<<5)+ch(q+1))<<5)+ch(q+2))<<5)+ch(q+3))<<5)+ch(q+4); for (h=htab[0]+(raw_hash-(ch(q)<<20)) % hash_prime; *h; hdown(0)) { r=(*h)->name; if (mtch(0) && match(1,2,3,4)) return *h; } @<Invoke |f| on every vertex that is adjacent to word~|q|@>; return NULL;} @ @<Invoke |f| on every vertex that is adjacent to word~|q|@>=if (f) { for (h=htab[0]+(raw_hash-(ch(q)<<20)) % hash_prime; *h; hdown(0)) { r=(*h)->name; if (match(1,2,3,4)) (*f)(*h); } for (h=htab[1]+(raw_hash-(ch(q+1)<<15)) % hash_prime; *h; hdown(1)) { r=(*h)->name; if (match(0,2,3,4)) (*f)(*h); } for (h=htab[2]+(raw_hash-(ch(q+2)<<10)) % hash_prime; *h; hdown(2)) { r=(*h)->name; if (match(0,1,3,4)) (*f)(*h); } for (h=htab[3]+(raw_hash-(ch(q+3)<<5)) % hash_prime; *h; hdown(3)) { r=(*h)->name; if (match(0,1,2,4)) (*f)(*h); } for (h=htab[4]+(raw_hash-ch(q+4)) % hash_prime; *h; hdown(4)) { r=(*h)->name; if (match(0,1,2,3)) (*f)(*h); }}@* Index. Here is a list that shows where the identifiers of this program aredefined and used.
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -