📄 treenode.c

📁 机器学习作者tom mitchell的书上代码
💻 C
📖 第 1 页 / 共 4 页
字号:
      tn->new_words[i] = 0;    }  /* Read the prior */  bow_fread_double (&(tn->prior), fp);  bow_fread_double (&(tn->new_prior), fp);  /* Read the lambda mixture weights */  bow_fread_int (&(tn->depth), fp);  tn->lambdas = bow_malloc ((tn->depth + 2) * sizeof (double));  tn->new_lambdas = bow_malloc ((tn->depth + 2) * sizeof (double));  for (i = 0; i < tn->depth + 2; i++)    {      bow_fread_double (&(tn->lambdas[i]), fp);      bow_fread_double (&(tn->new_lambdas[i]), fp);    }  /* Read in the class distribution */  bow_fread_int (&(tn->classes_capacity), fp);  if (tn->classes_capacity)    {      tn->classes = bow_malloc (tn->classes_capacity * sizeof (double));      tn->new_classes = bow_malloc (tn->classes_capacity * sizeof (double));      for (i = 0; i < tn->classes_capacity; i++)	{	  bow_fread_double (&(tn->classes[i]), fp);	  tn->new_classes[i] = 0;	}    }  else    tn->classes = tn->new_classes = NULL;  /* Read the children treenodes */  bow_fread_int (&(tn->children_count), fp);  bow_fread_int (&(tn->children_capacity), fp);  tn->children = bow_malloc (tn->children_capacity * sizeof (void*));  bow_fread_int (&(tn->ci_in_parent), fp);  for (i = 0; i < tn->children_count; i++)    {      tn->children[i] = bow_treenode_new_from_fp (fp);      tn->children[i]->parent = tn;      assert (tn->children[i]->ci_in_parent == i);    }  /* Initialize the DI_WI_NEW_WORDS later, only if requested. */  tn->di_loo = NULL;  tn->di_wvi_loo = NULL;  tn->new_di_loo = NULL;  tn->new_di_wvi_loo = NULL;  //bow_verbosify (bow_progress, "Read treenode %s\n", tn->name);  return tn;}/* Set all of TN's ancestor mixture weights, LAMBDAS, to equal values. */voidbow_treenode_set_lambdas_uniform (treenode *tn){  int i;  double lambda = 1.0 / (tn->depth + 2);  for (i = 0; i < tn->depth + 2; i++)    tn->lambdas[i] = lambda;}/* Same as above, but for all leaves in the tree. */voidbow_treenode_set_lambdas_uniform_all (treenode *tn){  treenode *iterator, *leaf;  assert (tn->parent == NULL);  for (iterator = tn; (leaf = bow_treenode_iterate_leaves (&iterator)); )    bow_treenode_set_lambdas_uniform (leaf);}/* Set TN's mixture weights, LAMBDAS, to use only the estimates. */voidbow_treenode_set_lambdas_leaf_only (treenode *tn){  int i;  tn->lambdas[0] = 1;  for (i = 1; i < tn->depth + 2; i++)    tn->lambdas[i] = 0;}/* Same as above, but for all leaves in the tree. */voidbow_treenode_set_lambdas_leaf_only_all (treenode *tn){  treenode *iterator, *leaf;  assert (tn->parent == NULL);  for (iterator = tn; (leaf = bow_treenode_iterate_leaves (&iterator)); )    bow_treenode_set_lambdas_leaf_only (leaf);}/* Add WEIGHT to treenode TN's record of how much probability mass   document DI contributed to TN's NEW_WORDS for the word at DI's   WVI'th word.  This mass can later be subtracted to do leave-one-out   calculations.  DI_WV_NUM_ENTRIES-1 is the maximum WVI that can be   expected for DI; DI_COUNT-1 is the maximum DI that can be expected;   both are used to know how much space to allocate. */voidbow_treenode_add_new_loo_for_di_wvi (treenode *tn, 				     double weight, int di, int wvi,				     int di_wv_num_entries, int di_count){  int i;  if (tn->new_di_loo == NULL)    {      tn->new_di_loo =	bow_malloc (di_count * sizeof (double));      for (i = 0; i < di_count; i++)	tn->new_di_loo[i] = 0;    }  if (tn->new_di_wvi_loo == NULL)    {      tn->new_di_wvi_loo = 	bow_malloc (di_count * sizeof (void*));      for (i = 0; i < di_count; i++)	tn->new_di_wvi_loo[i] = NULL;    }  if (tn->new_di_wvi_loo[di] == NULL)    {      tn->new_di_wvi_loo[di] = 	bow_malloc (di_wv_num_entries * sizeof (double));      for (i = 0; i < di_wv_num_entries; i++)	tn->new_di_wvi_loo[di][i] = 0;    }  tn->new_di_loo[di] += weight;  tn->new_di_wvi_loo[di][wvi] += weight;}/* Clear all LOO info for treenode TN */voidbow_treenode_free_loo (treenode *tn, int di_count){  int i;  /* For now, clear by freeing */  if (tn->di_loo)    {      bow_free (tn->di_loo);      tn->di_loo = NULL;    }  if (tn->di_wvi_loo)    {      for (i = 0; i < di_count; i++)	{	  if (tn->di_wvi_loo[i])	    bow_free (tn->di_wvi_loo[i]);	}      bow_free (tn->di_wvi_loo);      tn->di_wvi_loo = NULL;    }}/* Same as above, over all nodes of the tree. */voidbow_treenode_free_loo_all (treenode *root, int di_count){  int ci;  bow_treenode_free_loo (root, di_count);  for (ci = 0; ci < root->children_count; ci++)    bow_treenode_free_loo_all (root->children[ci], di_count);}/* Clear all LOO info for treenode TN */voidbow_treenode_free_loo_and_new_loo (treenode *tn, int di_count){  int i;  if (tn->di_loo)    {      bow_free (tn->di_loo);      tn->di_loo = NULL;    }  if (tn->new_di_loo)    {      bow_free (tn->new_di_loo);      tn->new_di_loo = NULL;    }  if (tn->di_wvi_loo)    {      for (i = 0; i < di_count; i++)	{	  if (tn->di_wvi_loo[i])	    bow_free (tn->di_wvi_loo[i]);	}      bow_free (tn->di_wvi_loo);      tn->di_wvi_loo = NULL;    }  if (tn->new_di_wvi_loo)    {      for (i = 0; i < di_count; i++)	{	  if (tn->new_di_wvi_loo[i])	    bow_free (tn->new_di_wvi_loo[i]);	}      bow_free (tn->new_di_wvi_loo);      tn->new_di_wvi_loo = NULL;    }}/* Same as above, over all nodes of the tree. */voidbow_treenode_free_loo_and_new_loo_all (treenode *root, int di_count){  int ci;  bow_treenode_free_loo_and_new_loo (root, di_count);  for (ci = 0; ci < root->children_count; ci++)    bow_treenode_free_loo_and_new_loo_all (root->children[ci], di_count);}/* Set the leave-one-out information used for future BOW_TREENODE_PR_WI*()   calculations from the NEW_*_LOO variables, then clear the NEW_*_LOO   variables so they are ready for the next round. */static voidbow_treenode_set_loo_from_new_loo (treenode *tn, int di_count){  bow_treenode_free_loo (tn, di_count);  tn->di_loo = tn->new_di_loo;  tn->di_wvi_loo = tn->new_di_wvi_loo;  tn->new_di_loo = NULL;  tn->new_di_wvi_loo = NULL;}/* Normalize the NEW_WORDS distribution, move it into the WORDS array   and zero the NEW_WORDS array.  ALPHA is the parameter for the   Dirichlet prior. */voidbow_treenode_set_words_from_new_words (treenode *tn, double alpha){  int wi;  double total_word_count = 0.0;  /* A special case for "Misc" nodes: increase their smoothing.  NOTE:     This has no effect if MISC_STAYS_FLAT is non-zero. */  if (strstr (tn->name, "/Misc/"))    alpha++;  /* Calculate the normalizing constant */  for (wi = 0; wi < tn->words_capacity; wi++)    total_word_count += tn->new_words[wi];  total_word_count += alpha * tn->words_capacity;  //assert (total_word_count);  if (total_word_count == 0)    {      alpha = 1.0 / tn->words_capacity;;      total_word_count = 1.0;    }  for (wi = 0; wi < tn->words_capacity; wi++)    {      //assert (tn->new_words[wi] > 0);      assert (tn->new_words[wi] >= 0);#if !USE_ACCELERATED_EM#if !MISC_STAYS_FLAT      tn->words[wi] = (alpha + tn->new_words[wi]) / total_word_count;#else      /* A special case for "Misc" nodes: they stay flat */      if (strstr (tn->name, "/Misc/"))	tn->words[wi] = 1.0 / tn->words_capacity;      else	tn->words[wi] = (alpha + tn->new_words[wi]) / total_word_count;#endif /* MISC_STAYS_FLAT */#else      tn->words[wi] = 	(((1.0 - EM_ACCELERATION) * tn->words[wi])	 + (EM_ACCELERATION * (alpha + tn->new_words[wi]) / total_word_count));      if (tn->words[wi] < 0)	tn->words[wi] = 0;#endif /* USE_ACCELERATED_EM */      assert (tn->words[wi] >= 0);      assert (tn->words[wi] <= 1);      tn->new_words[wi] = 0;    }#if USE_ACCELERATED_EM  /* Renormalize after setting some to zero. */  total_word_count = 0;  for (wi = 0; wi < tn->words_capacity; wi++)    total_word_count += tn->words[wi];  for (wi = 0; wi < tn->words_capacity; wi++)    tn->words[wi] /= total_word_count;#endif/* Why was this conditioned on MISC_STAYS_FLAT?   The bow_treenode_pr_wi_loo_local function doesn't work with the   new_words_normalizer equal to zero! */  if (!MISC_STAYS_FLAT || !strstr (tn->name, "/Misc/"))    tn->new_words_normalizer = total_word_count;  else    tn->new_words_normalizer = 0;  /* Also roll over the LOO information. */  bow_treenode_set_loo_from_new_loo (tn, crossbow_docs->length);}/* Over all nodes of the tree, normalize the NEW_WORDS distribution,   move it into the WORDS array and zero the NEW_WORDS array. */voidbow_treenode_set_words_from_new_words_all (treenode *root, double alpha){  int ci;  bow_treenode_set_words_from_new_words (root, alpha);  for (ci = 0; ci < root->children_count; ci++)    bow_treenode_set_words_from_new_words_all (root->children[ci], alpha);}/* Set NEW_WORDS counts to zero. */voidbow_treenode_set_new_words_to_zero (treenode *tn){  int wi;  for (wi = 0; wi < tn->words_capacity; wi++)    tn->new_words[wi] = 0;}/* Same as above, over all nodes of the tree. */voidbow_treenode_set_new_words_to_zero_all (treenode *root){  int ci;  bow_treenode_set_new_words_to_zero (root);  for (ci = 0; ci < root->children_count; ci++)    bow_treenode_set_new_words_to_zero_all (root->children[ci]);}/* Set the NEW_WORDS distribution from the addition of the WORDS   distribution and some random noise.  NOISE_WEIGHT 0.5 gives equal   weight to the data and the noise. */voidbow_treenode_set_new_words_from_perturbed_words (treenode *tn, 						 double noise_weight){  int wi;    for (wi = 0; wi < tn->words_capacity; wi++)    tn->new_words[wi] = ((1 - noise_weight) * tn->words[wi]			 + noise_weight * bow_random_01()/tn->words_capacity);}/* Same as above, over all nodes of the tree. */voidbow_treenode_set_new_words_from_perturbed_words_all (treenode *root, 						 double noise_weight){  int ci;  bow_treenode_set_new_words_from_perturbed_words (root, noise_weight);  for (ci = 0; ci < root->children_count; ci++)    bow_treenode_set_new_words_from_perturbed_words_all (root->children[ci],						     noise_weight);}/* Over all leaves of the tree, set the PRIOR by the results of   smoothing and normalizing the NEW_PRIOR distribution.  ALPHA is the   parameter for the Dirichlet prior. */voidbow_treenode_set_leaf_prior_from_new_prior_all (treenode *root, double alpha){  treenode *iterator, *leaf;  double prior_sum = 0;  assert (root->parent == NULL);  for (iterator = root; (leaf = bow_treenode_iterate_leaves (&iterator)); )    {      if (strstr (leaf->name, "/Misc/"))	{	  /* Arbitrarily give /Misc/ node the same weight as the average	     of the first two children of LEAF's parent. */	  assert (leaf->parent->children_count >= 2);	  leaf->new_prior = (leaf->parent->children[0]->new_prior			     + leaf->parent->children[1]->new_prior) / 2;	}      prior_sum += leaf->new_prior + alpha;    }  assert (prior_sum);  for (iterator = root; (leaf = bow_treenode_iterate_leaves (&iterator)); )    {      leaf->prior = (leaf->new_prior + alpha) / prior_sum;      leaf->new_prior = 0;    }}/* Over all nodes (including interior and root) of the tree, set the   PRIOR by the results of smoothing and normalizing the NEW_PRIOR   distribution.  ALPHA is the parameter for the Dirichlet prior. */voidbow_treenode_set_prior_from_new_prior_all (treenode *root, double alpha){  treenode *iterator, *leaf;  double prior_sum = 0;  assert (root->parent == NULL);  for (iterator = root; (leaf = bow_treenode_iterate_all (&iterator)); )    prior_sum += leaf->new_prior + alpha;  assert (prior_sum);  for (iterator = root; (leaf = bow_treenode_iterate_all (&iterator)); )    {      leaf->prior = (leaf->new_prior + alpha) / prior_sum;      leaf->new_prior = 0;    }}/* Over all nodes (including interior and root) of the tree, plus one   "extra" quantity (intended for the prior probability of the uniform   distribution), set the PRIOR by the results of smoothing and   normalizing the NEW_PRIOR distribution, and set EXTRA as part of   the normalization.  ALPHA is the parameter for the Dirichlet   prior. */voidbow_treenode_set_prior_and_extra_from_new_prior_all (treenode *root, 						     double *new_extra, 						     double *extra, 						     double alpha){  treenode *iterator, *leaf;  double prior_sum = 0;  assert (root->parent == NULL);  for (iterator = root; (leaf = bow_treenode_iterate_all (&iterator)); )    prior_sum += leaf->new_prior + alpha;  prior_sum += *new_extra + alpha;  assert (prior_sum);  for (iterator = root; (leaf = bow_treenode_iterate_all (&iterator)); )    {      leaf->prior = (leaf->new_prior + alpha) / prior_sum;      leaf->new_prior = 0;    }  *extra = (*new_extra + alpha) / prior_sum;  *new_extra = 0;}/* Normalize the NEW_LAMBDAS distribution, move it into the LAMBDAS array   and zero the NEW_LAMBDAS array.  ALPHA is the parameter for the   Dirichlet prior. */voidbow_treenode_set_lambdas_from_new_lambdas (treenode *tn, double alpha)
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -