📄 count-local-trees.c
字号:
/* count-local-trees.c
*
* Mark Johnson, 21st May 1997
*
* Reads the f2-21.txt treebank format trees from stdin, and counts
* the local trees.
*
* readlabel() ignores all but the first component of node labels
*
* We ignore the terminals, and only count the preterminals and nonterminals
*/
#include <stdio.h>
#include <string.h>
#include <assert.h>
#include "local-trees.h" /* local tree count format */
#include "mmm.h" /* memory debugger */
#include "hash-string.h" /* hash tables and string-index tables */
#include "vindex.h"
#include "tree.h"
#define PARENT_ANNOTATION 1
void count_local_trees(const tree tree, vihashl localtree_ht);
/* adds local tree counts from local trees in tree to localtree_ht */
void write_local_trees(FILE *fh, const vihashl localtree_ht, si_t si);
/* writes local tree hash table to stdout */
void count_local_trees(const tree t, vihashl localtree_ht)
{
si_index e[MAXRHS];
struct vindex vi = {0, e};
tree p;
if (!t || !t->subtrees) return;
vi.n = 0;
vi.e[vi.n++] = t->label;
for(p = t->subtrees; p; p=p->sibling) {
assert(vi.n < MAXRHS);
vi.e[vi.n++] = p->label;
count_local_trees(p, localtree_ht);
}
vihashl_inc(localtree_ht, &vi, 1); /* save this list */
}
void write_local_trees(FILE *fp,const vihashl localtree_ht, si_t si)
{
vihashlit hit;
vindex vi;
long count;
size_t i;
char *string;
for (hit = vihashlit_init(localtree_ht); vihashlit_ok(hit); hit = vihashlit_next(hit)) {
vi = (vindex) hit.key;
assert(vi->n > 0);
assert(vi->n <= MAXRHS);
count = hit.value;
string = si_index_string(si, vi->e[0]);
assert(string);
fprintf(fp,"%ld\t%s " REWRITES, count, string);
for (i=1; i<vi->n; i++) {
string = si_index_string(si, vi->e[i]);
assert(string);
fprintf(fp," %s", string);
}
fprintf(fp,"\n");
}}
int
main(int argc, char **argv)
{
tree t, p;
si_t si = make_si(100);
FILE *fp = stdin;
vihashl localtree_ht = make_vihashl(1000);
vindex terms;
struct vindex terms0;
int maxsentlen=0;
if ((argc < 1) || (argc >2)) {
fprintf(stderr, "%s maxsentlen < trees > local-tree-counts\n", argv[0]);
exit(EXIT_FAILURE);
}
if (argc == 2)
maxsentlen=atol(argv[1]);
/* read the trees and save the local tree counts */
while ((t = readtree_root(fp, si))) {
/*
si_display(si, stdout);
display_tree(stdout,t, si, 0);
printf("\n");
*/
p = collapse_identical_unary(t); free_tree(t);
terms0 = tree_terms(p);
terms=&terms0;
if (!maxsentlen || (int) terms->n <= maxsentlen) {
if(PARENT_ANNOTATION) {
t = annotate_with_parent(p, si); free_tree(p);
/* p = remove_parent_annotation(t, si); */
p=t;
/* display_tree(stdout, p, si, 0); printf("\n\n"); */
}
count_local_trees(p, localtree_ht);
}
free_tree(p);
FREE(terms->e); /* free the terms */
}
write_local_trees(stdout,localtree_ht, si);
si_free(si);
free_vihashl(localtree_ht);
assert(mmm_blocks_allocated == 0);
exit(0);
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -