⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 treenode.c

📁 机器学习作者tom mitchell的书上代码
💻 C
📖 第 1 页 / 共 4 页
字号:
/* treenode.c - Functions for hierarchical word distributions. *//* Copyright (C) 1997, 1998, 1999 Andrew McCallum   Written by:  Andrew Kachites McCallum <mccallum@cs.cmu.edu>   This file is part of the Bag-Of-Words Library, `libbow'.   This library is free software; you can redistribute it and/or   modify it under the terms of the GNU Library General Public License   as published by the Free Software Foundation, version 2.      This library is distributed in the hope that it will be useful,   but WITHOUT ANY WARRANTY; without even the implied warranty of   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU   Library General Public License for more details.   You should have received a copy of the GNU Library General Public   License along with this library; if not, write to the Free Software   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111, USA */#include <bow/libbow.h>#include <argp.h>#include <bow/crossbow.h>#define USE_ACCELERATED_EM 0#define MISC_STAYS_FLAT 1/* Accellerated EM: still gauranteed to converge below 2.0.  1.8 is good */#define EM_ACCELERATION 1.8/* Functions for creating, reading, writing the treenode *//* Create and return a new treenode, adding it as a child of PARENT,   if PARENT is non-NULL. */treenode *bow_treenode_new (treenode *parent, 	      int children_capacity,	      const char *name){  treenode *ret;  int i;  /* To avoid malloc'ing zero bytes */  if (children_capacity == 0)    children_capacity = 1;  ret = bow_malloc (sizeof (treenode));  /* Set relationship with parent. */  ret->parent = parent;  if (parent)    {      ret->ci_in_parent = parent->children_count;      if (parent->children_count >= parent->children_capacity)	{	  parent->children_capacity *= 2;	  parent->children = 	    bow_realloc (parent->children, 			 parent->children_capacity * sizeof (void*));	}      parent->children[parent->children_count++] = ret;      if (name)	{	  /* +1 for the /, +1 for the terminating 0 */	  ret->name = bow_malloc (strlen (parent->name) + strlen (name) + 2);	  sprintf ((char*)ret->name, "%s%s/", parent->name, name);	}      else	{	  ret->name = bow_malloc (strlen (parent->name) + 20);	  sprintf ((char*)ret->name, "%s%d/", parent->name, ret->ci_in_parent);	}      ret->words_capacity = parent->words_capacity;      ret->classes_capacity = parent->classes_capacity;    }  else    {      ret->ci_in_parent = -1;      ret->name = strdup ("/");      ret->words_capacity = bow_num_words ();      ret->classes_capacity = 0;    }  ret->children_count = 0;  ret->children_capacity = children_capacity;  assert (children_capacity);  ret->children = bow_malloc (ret->children_capacity * sizeof (void*));  ret->words = bow_malloc (ret->words_capacity * sizeof (double));  ret->new_words = bow_malloc (ret->words_capacity * sizeof (double));  ret->new_words_normalizer = 0;  for (i = 0; i < ret->words_capacity; i++)    {      ret->words[i] = 0;      ret->new_words[i] = 0;    }  ret->prior = 1.0;  ret->new_prior = 1.0;  if (parent)    ret->depth = parent->depth + 1;  else    ret->depth = 0;  /* Initialize ancestor mixture weights, LAMBDAS, to use exclusively     the local estimate. */  ret->lambdas = bow_malloc ((ret->depth + 2) * sizeof (double));  ret->new_lambdas = bow_malloc ((ret->depth + 2) * sizeof (double));  ret->lambdas[0] = 1.0;  ret->new_lambdas[0] = 1.0;  for (i = 1; i < ret->depth + 2; i++)    {      ret->lambdas[i] = 0;      ret->new_lambdas[i] = 0;    }  /* Initialize the CLASSES distribution later, only if requested. */  if (ret->classes_capacity == 0)    {      ret->classes = NULL;      ret->new_classes = NULL;    }  else    {      ret->classes = bow_malloc (ret->classes_capacity * sizeof (double));      ret->new_classes = bow_malloc (ret->classes_capacity * sizeof (double));      for (i = 0; i < ret->classes_capacity; i++)	{	  ret->classes[i] = 1.0 / ret->classes_capacity;	  ret->new_classes[i] = 0.0;	}    }  /* Initialize the DI_WI_NEW_WORDS later, only if requested. */  ret->di_loo = NULL;  ret->di_wvi_loo = NULL;  ret->new_di_loo = NULL;  ret->new_di_wvi_loo = NULL;  return ret;}/* Free the memory allocate by TN and its children */voidbow_treenode_free (treenode *tn){  int ci;  assert (tn == NULL && tn->ci_in_parent == -1);  for (ci = 0; ci < tn->children_count; ci++)    bow_treenode_free (tn->children[ci]);  if (tn->children) bow_free (tn->children);  if (tn->words) bow_free (tn->words);  if (tn->new_words) bow_free (tn->new_words);  if (tn->lambdas) bow_free (tn->lambdas);  if (tn->new_lambdas) bow_free (tn->new_lambdas);  if (tn->name) bow_free ((char*)tn->name);  bow_free (tn);}/* Create and return a new treenode with the proper settings to be the   root treenode */treenode *bow_treenode_new_root (int children_count){  return bow_treenode_new (NULL, children_count, NULL);}/* Reallocate memory for the WORDS and NEW_WORDS arrays, big enough    for the vocabulary of size bow_num_words().  This is useful when the    tree has been created before all the documents have been indexed. */voidbow_treenode_realloc_words_all (treenode *root){  int i;  if (bow_num_words () > root->words_capacity)    {      root->words_capacity = bow_num_words ();      root->words = 	bow_realloc (root->words, root->words_capacity * sizeof (double));      root->new_words = 	bow_realloc (root->words, root->words_capacity * sizeof (double));      root->new_words_normalizer = 0;      for (i = 0; i < root->words_capacity; i++)	{	  root->words[i] = 0;	  root->new_words[i] = 0;	}    }  for (i = 0; i < root->children_count; i++)    bow_treenode_realloc_words_all (root->children[i]);}/* Add to parent a CHILD that was previously created with a NULL parent. */voidbow_treenode_add_child (treenode *parent, treenode *child){  assert (parent->children_count < parent->children_capacity);  assert (child->ci_in_parent == -1);  child->parent = parent;  child->ci_in_parent = parent->children_count;  parent->children[parent->children_count++] = child;}/* Detach CHILD from PARENT, shifting remaining children, and updating   the remaining children's CI_IN_PARENT. */voidbow_treenode_remove_child (treenode *parent, treenode *child){  bow_error ("%s: Not yet implemented", __PRETTY_FUNCTION__);}/* To this node and all its children, add a child named "Misc" if not   there already. */voidbow_treenode_add_misc_child_all (treenode *root){  int ci;  /* If ROOT is a leaf, just return immediately */  if (root->children_count == 0)    return;  /* Search for a pre-existing "Misc" child */  for (ci = 0; ci < root->children_count; ci++)    if (strstr (root->children[ci]->name, "/Misc/"))      goto do_children;  /* Add a "Misc" child */  bow_treenode_new (root, 0, "Misc");  /* Recursively handle children */ do_children:  for (ci = 0; ci < root->children_count; ci++)    bow_treenode_add_misc_child_all (root->children[ci]);}/* Return the "next" treenode in a traversal of the tree.  CONTEXT   should be initialized to the root of the tree, otherwise strange   results will ensue: nodes on the path from the initial CONTEXT node   to the root will be skipped by the iteration.  When the traversal   is finished, NULL will be returned. */treenode *bow_treenode_iterate_all (treenode **context){  treenode *ret;  if (*context == NULL)    return NULL;  /* Save the context as this call's return value */  ret = *context;  /* Update the context for the next call. */  if ((*context)->children_count)    *context = (*context)->children[0];  else    {      while ((*context)->parent	     && ((*context)->ci_in_parent		 == (*context)->parent->children_count-1))	{	  *context = (*context)->parent;	}      if ((*context)->parent)	*context = (*context)->parent->children[(*context)->ci_in_parent+1];      else	*context = NULL;    }  return ret;}/* Same as above, but only return the leaf nodes. */treenode *bow_treenode_iterate_leaves (treenode **context){  treenode *ret;  while ((ret = bow_treenode_iterate_all (context))	 && ret->children_count != 0)    ;  return ret;}treenode *bow_treenode_iterate_all_under_node (treenode **context, treenode *node){  treenode *ret;  if (*context == NULL)    return NULL;  /* Save the context as this call's return value */  ret = *context;  /* Update the context for the next call. */  if ((*context)->children_count)    *context = (*context)->children[0];  else    {      while ((*context)->parent != node	     && ((*context)->ci_in_parent		 == (*context)->parent->children_count-1))	{	  *context = (*context)->parent;	}      if ((*context)->parent != node)	*context = (*context)->parent->children[(*context)->ci_in_parent+1];      else	*context = NULL;    }  return ret;}/* Same as above, but only return the leaf nodes. */treenode *bow_treenode_iterate_leaves_under_node (treenode **context, treenode *node){  treenode *ret;  while ((ret = bow_treenode_iterate_all_under_node (context, node))	 && ret->children_count != 0)    ;  return ret;}/* Return the deepest descendant with a ->NAME that is contained in NAME */treenode *bow_treenode_descendant_matching_name (treenode *root, const char *name){  int ci;  treenode *tr;#if WHIZBANG  char buf[256];    strcpy (buf, "./data");  strcat (buf, root->name);  if (strstr (name, buf) != name)#else  if (!strstr (name, root->name))    return NULL;#endif  for (ci = 0; ci < root->children_count; ci++)    {      if ((tr = bow_treenode_descendant_matching_name	   (root->children[ci], name)))	return tr;    }#if WHIZBANG  assert (root->depth == 2);#endif  return root;}/* Archiving */#define BOW_TREENODE_HEADER_STRING "treenode\n"/* Write a treenode (and all its children) to FP. */voidbow_treenode_write (treenode *tn, FILE *fp){  int i;  /* Write a tag that will later help verify we are reading correctly. */  bow_fwrite_string (BOW_TREENODE_HEADER_STRING, fp);  /* If TN is NULL, write a 0 and return */  if (tn)    bow_fwrite_int (1, fp);  else    {      bow_fwrite_int (0, fp);      return;    }  /* Write the name */  bow_fwrite_string (tn->name, fp);  /* Write the multinomial */  bow_fwrite_int (tn->words_capacity, fp);  bow_fwrite_double (tn->new_words_normalizer, fp);  for (i = 0; i < tn->words_capacity; i++)    bow_fwrite_double (tn->words[i], fp);  /* Write the prior */  bow_fwrite_double (tn->prior, fp);  bow_fwrite_double (tn->new_prior, fp);  /* Write the lambda mixture weights */  bow_fwrite_int (tn->depth, fp);  for (i = 0; i < tn->depth + 2; i++)    {      bow_fwrite_double (tn->lambdas[i], fp);      bow_fwrite_double (tn->new_lambdas[i], fp);    }  /* Write the class distribution */  bow_fwrite_int (tn->classes_capacity, fp);  if (tn->classes_capacity)    {      for (i = 0; i < tn->classes_capacity; i++)	bow_fwrite_double (tn->classes[i], fp);    }  /* Write the children treenodes */  bow_fwrite_int (tn->children_count, fp);  bow_fwrite_int (tn->children_capacity, fp);  bow_fwrite_int (tn->ci_in_parent, fp);  for (i = 0; i < tn->children_count; i++)    bow_treenode_write (tn->children[i], fp);}/* Read and return a new treenode (and all its children) from FP. */treenode *bow_treenode_new_from_fp (FILE *fp){  char *header;  treenode *tn;  int i;  /* Verify that we are starting read from the correct place in the FP */  bow_fread_string (&header, fp);  if (strcmp (header, BOW_TREENODE_HEADER_STRING) != 0)    bow_error ("Trying to read a treenode from bad FILE* location");  bow_free (header);  /* If a NULL treenode was written, return NULL. */  bow_fread_int (&i, fp);  if (i == 0)    return NULL;  tn = bow_malloc (sizeof (treenode));  tn->parent = NULL;  /* Read the name */  bow_fread_string ((char**)&(tn->name), fp);  /* Read the multinomial */  bow_fread_int (&(tn->words_capacity), fp);  tn->words = bow_malloc (tn->words_capacity * sizeof (double));  tn->new_words = bow_malloc (tn->words_capacity * sizeof (double));  bow_fread_double (&(tn->new_words_normalizer), fp);  for (i = 0; i < tn->words_capacity; i++)    {      bow_fread_double (&(tn->words[i]), fp);

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -