⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 multiclass.c

📁 机器学习作者tom mitchell的书上代码
💻 C
📖 第 1 页 / 共 4 页
字号:
					exclude_cis, exclude_cis_size,					exclude_cis_capacity);    }  return 0;}/* Greedily add classes to CIS in the order of their mixture weight. */intmulticlass_explore_cis_greedy1 (crossbow_doc *doc, 				multiclass_score *scores, int *scores_count,				int scores_capacity,				const int *exclude_cis, int exclude_cis_size,				int exclude_cis_capacity){  int nc = crossbow_root->children_count;  int nm = nc + 2;  double *mixture;  int *cis, cis_size, *local_exclude_cis, local_exclude_cis_size;  int cisi, mi;  double max_mixture;  int max_mi = -1;  int top_mi = -1;  mixture = alloca ((nm) * sizeof(double));  cis = alloca ((nc+2) * sizeof(int));  local_exclude_cis = alloca (exclude_cis_capacity * sizeof(int));  for (cisi = 0; cisi < nc; cisi++)    cis[cisi] = -1;  cis_size =  0;  local_exclude_cis_size = exclude_cis_size;  for (cisi = 0; cisi < exclude_cis_size; cisi++)    local_exclude_cis[cisi] = exclude_cis[cisi];  multiclass_mixture_given_doc (doc, mixture);  for (cisi = 0; cisi < exclude_cis_size; cisi++)    mixture[exclude_cis[cisi]] = -1;  /* Exclude the root and uniform mixtures */  mixture[nc] = mixture[nc+1] = -1;  while (cis_size < MAX_NUM_MIXTURE_CLASSES && cis_size < nc)    {      max_mixture = -1;      max_mi = -1;      for (mi = 0; mi < nc; mi++)	/* Make sure we don't pick a class that has no training data */	if (mixture[mi] > max_mixture && crossbow_root->children[mi]->prior)	  {	    max_mixture = mixture[mi];	    max_mi = mi;	  }      /* If MAX_MI is still -1, then we didn't find any positive 	 mixture weight; they were all excluded or zero from 	 multiclass_mixture_given_doc().  Nothing more to be done;	 pop out of this loop. */      if (max_mi < 0)	break;      for (cisi = 0; cisi < cis_size; cisi++)	assert (cis[cisi] != max_mi);      if (cis_size == 0)	top_mi = max_mi;      assert (mixture[max_mi] > 0);      /* Drive this mixture low so that it will never be max_mi again */      mixture[max_mi] = -1;      assert (max_mi < crossbow_root->children_count);      cis[cis_size++] = max_mi;      qsort (cis, cis_size, sizeof (int), compare_ints);      if (multiclass_cis_scores_index (cis, cis_size, 				       scores, *scores_count) == -1	  && !multiclass_artificially_prune_cis (cis, cis_size))	{	  for (cisi = 0; cisi < MAX_NUM_MIXTURE_CLASSES; cisi++)	    scores[*scores_count].c[cisi] = cis[cisi];	  scores[*scores_count].score = 	    multiclass_log_prob_of_classes_given_doc (cis, cis_size, doc);	  (*scores_count)++;	  assert (*scores_count < scores_capacity);	}    }  if (exclude_cis_size < 5 && exclude_cis_size < exclude_cis_capacity)    {      assert (top_mi >= 0);      local_exclude_cis[local_exclude_cis_size++] = top_mi;      multiclass_explore_cis_greedy1 (doc, scores, scores_count,				      scores_capacity,				      local_exclude_cis, 				      local_exclude_cis_size,				      exclude_cis_capacity);    }  return 0;}intmulticlass_explore_cis_greedy2 (crossbow_doc *doc, 				multiclass_score *scores, int *scores_count,				int scores_capacity,				const int *exclude_cis, int exclude_cis_size,				int exclude_cis_capacity){  int nc = crossbow_root->children_count;  int nm = nc + 2;  double *mixture;  int *cis, cis_size, *local_exclude_cis;  int cisi, mi;  double max_mixture;  double max_mi;  mixture = alloca ((nm) * sizeof(double));  cis = alloca ((nc+2) * sizeof(int));  local_exclude_cis = alloca ((nc+2) * sizeof(int));  for (cisi = 0; cisi < nc; cisi++)    cis[cisi] = -1;  cis_size = 0;  /* Greedily add classes to the mixture one at a time. */  while (cis_size < MAX_NUM_MIXTURE_CLASSES)    {      multiclass_mixture_given_doc_and_partial_cis	(doc, cis, cis_size, exclude_cis, exclude_cis_size, mixture);      max_mixture = -FLT_MAX;      max_mi = -1;      for (mi = 0; mi < nm; mi++)	if (mixture[mi] > max_mixture && mi < nc)	  {	    max_mixture = mixture[mi];	    max_mi = mi;	  }      assert (max_mi >= 0);      cis[cis_size++] = max_mi;      qsort (cis, cis_size, sizeof (int), compare_ints);      if (multiclass_cis_scores_index (cis, cis_size, 				       scores, *scores_count) == -1)	{	  for (cisi = 0; cisi < MAX_NUM_MIXTURE_CLASSES; cisi++)	    scores[*scores_count].c[cisi] = cis[cisi];	  scores[*scores_count].score = 	    multiclass_log_prob_of_classes_given_doc (cis, cis_size, doc);	  (*scores_count)++;	  assert (*scores_count < scores_capacity);	}    }  return 0;}intmulticlass_classify_doc (crossbow_doc *doc, int verbose, FILE *out){  int nc = crossbow_root->children_count;  int *exclude_cis, exclude_cis_size, exclude_cis_capacity;  int *cis, cis_size, cis_capacity, cisi, si, scores_count;  int scores_capacity = nc * nc * nc/2;  multiclass_score *scores, top_score;  exclude_cis_capacity = nc+2;  exclude_cis = alloca (exclude_cis_capacity * sizeof(int));  scores = bow_malloc (scores_capacity * sizeof (multiclass_score));  cis_capacity = nc;  cis = alloca (nc * sizeof(int));  si = 0;  for (cisi = 0; cisi < nc; cisi++)    cis[cisi] = exclude_cis[cisi] = -1;  exclude_cis_size = 0;  cis_size = 0;  scores_count = 0;  multiclass_explore_cis_greedy0 (doc, scores, &scores_count, scores_capacity,				  cis, cis_size, cis_capacity,				  exclude_cis, exclude_cis_size,				  exclude_cis_capacity);  multiclass_explore_cis_greedy1 (doc, scores, &scores_count, scores_capacity,				  exclude_cis, exclude_cis_size,				  exclude_cis_capacity);    assert (scores_count < scores_capacity);  qsort (scores, scores_count, sizeof (multiclass_score), 	 compare_multiclass_scores);  if (verbose)    {      double *mixture = alloca (MAX_NUM_MIXTURES * sizeof (double));      fprintf (out, "%s ", doc->filename);      if (doc->cis_size >= 0)	{	  for (cisi = 0; cisi < doc->cis_size-1; cisi++)	    fprintf (out, "%s,", bow_int2str (crossbow_classnames, 					      doc->cis[cisi]));	  fprintf (out, "%s ", 		   bow_int2str (crossbow_classnames, doc->cis[cisi]));	}      else	fprintf (out, "<unknown> ");      /* Artificially print only the top scores */      for (si = 0; si < 30; si++)	{	  fprintf (out,"%s",bow_int2str(crossbow_classnames,scores[si].c[0]));	  for (cisi = 1; 	       cisi < MAX_NUM_MIXTURE_CLASSES && scores[si].c[cisi] >= 0;	       cisi++)	    fprintf (out, ",%s", 		     bow_int2str (crossbow_classnames, scores[si].c[cisi]));	  fprintf (out, ":%g ", scores[si].score);	  if (verbose < 2)	    break;	}      fprintf (out, "\n");      /* Print the mixture of the winner */      for (cis_size = 0; cis_size < MAX_NUM_MIXTURE_CLASSES; cis_size++)	if (scores[0].c[cis_size] == -1)	  break;      multiclass_mixture_given_cis (scores[0].c, cis_size, mixture);      for (cisi = 0; cisi < cis_size+2; cisi++)	fprintf (out, "%g ", mixture[cisi]);      fprintf (out, "\n");      /* Print a message if the correct mixture was never even tested. */      if (multiclass_cis_scores_index (doc->cis, doc->cis_size,				       scores, scores_count) == -1)	fprintf (out, "Correct class set was not explored in %d sets.\n",		 scores_count);    }  /* Return 1 iff the classification is completely correct. */  top_score = scores[0];  bow_free (scores);  for (cisi = 0; cisi < crossbow_root->children_count; cisi++)    {      if (top_score.c[cisi] == -1 && doc->cis[cisi] == -1)	return 1;      if (top_score.c[cisi] != doc->cis[cisi])	return 0;    }  return 1;}intold2_multiclass_classify_doc (crossbow_doc *doc, int verbose, FILE *out){  int nc = crossbow_root->children_count;  int c1, c2, c3, c4, c5, si, scores_count, cisi, correct;  int this_cis[MAX_NUM_MIXTURE_CLASSES];  int scores_capacity = nc * nc * nc;  multiclass_score *scores, top_score;  /* xxx More than enough space */  scores = bow_malloc (scores_capacity * sizeof (multiclass_score));  si = 0;  for (c1 = 0; c1 < nc; c1++)    {      this_cis[0] = scores[si].c[0] = c1;      this_cis[1] = scores[si].c[1] = -1;      this_cis[2] = scores[si].c[2] = -1;      this_cis[3] = scores[si].c[3] = -1;      this_cis[4] = scores[si].c[4] = -1;      scores[si].score = multiclass_log_prob_of_classes_given_doc	(this_cis, 1, doc);      si++;    }  /* Sort the single-class results, and use these rankings to prune the      number of class combinations we evaluate. */  qsort (scores, si, sizeof (multiclass_score), compare_multiclass_scores);  for (c1 = 0; c1 < nc; c1++)    for (c2 = c1+1; c2 < nc; c2++)      {	this_cis[0] = scores[si].c[0] = c1;	this_cis[1] = scores[si].c[1] = c2;	this_cis[2] = scores[si].c[2] = -1;	this_cis[3] = scores[si].c[3] = -1;	this_cis[4] = scores[si].c[4] = -1;	if (multiclass_cis_is_in_top (this_cis, scores, MAX(nc/3,10)))	  scores[si].score = multiclass_log_prob_of_classes_given_doc	    (this_cis, 2, doc);	else	  scores[si].score = -FLT_MAX;	si++;      }  for (c1 = 0; c1 < nc; c1++)    for (c2 = c1+1; c2 < nc; c2++)      for (c3 = c2+1; c3 < nc; c3++)	{	  this_cis[0] = scores[si].c[0] = c1;	  this_cis[1] = scores[si].c[1] = c2;	  this_cis[2] = scores[si].c[2] = c3;	  this_cis[3] = scores[si].c[3] = -1;	  this_cis[4] = scores[si].c[4] = -1;	  if (multiclass_cis_is_in_top (this_cis, scores, MAX(nc/3,10)))	    scores[si].score = multiclass_log_prob_of_classes_given_doc	      (this_cis, 3, doc);	  else	    scores[si].score = -FLT_MAX;	  si++;	}  if (MAX_NUM_MIXTURE_CLASSES < 4)    goto done_scoring;  for (c1 = 0; c1 < nc; c1++)    for (c2 = c1+1; c2 < nc; c2++)      for (c3 = c2+1; c3 < nc; c3++)	for (c4 = c3+1; c4 < nc; c4++)	  {	    this_cis[0] = scores[si].c[0] = c1;	    this_cis[1] = scores[si].c[1] = c2;	    this_cis[2] = scores[si].c[2] = c3;	    this_cis[3] = scores[si].c[3] = c4;	    this_cis[4] = scores[si].c[4] = -1;	    if (multiclass_cis_is_in_top (this_cis, scores, MAX(nc/3,10)))	      scores[si].score = multiclass_log_prob_of_classes_given_doc		(this_cis, 4, doc);	    else	      scores[si].score = -FLT_MAX;	    si++;	  }  if (MAX_NUM_MIXTURE_CLASSES < 5)    goto done_scoring;  for (c1 = 0; c1 < nc; c1++)    for (c2 = c1+1; c2 < nc; c2++)      for (c3 = c2+1; c3 < nc; c3++)	for (c4 = c3+1; c4 < nc; c4++)	  for (c5 = c4+1; c5 < nc; c5++)	    {	      this_cis[0] = scores[si].c[0] = c1;	      this_cis[1] = scores[si].c[1] = c2;	      this_cis[2] = scores[si].c[2] = c3;	      this_cis[3] = scores[si].c[3] = c4;	      this_cis[4] = scores[si].c[4] = c5;	      if (multiclass_cis_is_in_top (this_cis, scores, MAX(nc/3,10)))		scores[si].score = multiclass_log_prob_of_classes_given_doc		  (this_cis, 5, doc);	      else		scores[si].score = -FLT_MAX;	      si++;	    } done_scoring:  scores_count = si;  assert (scores_count < scores_capacity);  qsort (scores, scores_count, sizeof (multiclass_score), 	 compare_multiclass_scores);  if (verbose)    {      fprintf (out, "%s ", doc->filename);      if (doc->cis_size >= 0)	{	  for (cisi = 0; cisi < doc->cis_size-1; cisi++)	    fprintf (out, "%s,", bow_int2str (crossbow_classnames, 					      doc->cis[cisi]));	  fprintf (out, "%s ", 		   bow_int2str (crossbow_classnames, doc->cis[cisi]));	}      else	fprintf (out, "<unknown> ");      /* Artificially print only the top 20 scores */      scores_count = 20;      for (si = 0; si < scores_count; si++)	{	  fprintf (out, "%s", 		   bow_int2str (crossbow_classnames, scores[si].c[0]));	  if (scores[si].c[1] >= 0)	    fprintf (out, ",%s", 		     bow_int2str (crossbow_classnames, scores[si].c[1]));	  if (scores[si].c[2] >= 0)	    fprintf (out, ",%s", 		     bow_int2str (crossbow_classnames, scores[si].c[2]));	  fprintf (out, ":%g ", scores[si].score);	  if (verbose < 2)	    break;	}      fprintf (out, "\n");    }  top_score = scores[0];  bow_free (scores);  correct = 1;  assert (doc->cis_size <= 3);  if (top_score.c[0] != doc->cis[0])    return 0;  if (doc->cis_size == 1)    return 1;  if (top_score.c[1] != doc->cis[1])    return 0;  if (doc->cis_size == 2)    return 1;  if (top_score.c[2] != doc->cis[2])    return 0;  return 1;}intmulticlass_old_classify_doc (crossbow_doc *doc, int verbose, FILE *out){  bow_wv *wv;  int cisi, ni;  double node_data_prob[crossbow_root->children_count];  double node_data_prob_sum;  double node_membership[crossbow_root->children_count];  bow_score scores[crossbow_root->children_count];  double score_sum;  int wvi;  treenode *node;  score_sum = 0;  for (ni = 0; ni < crossbow_root->children_count; ni++)    scores[ni].weight = 0;  wv = crossbow_wv_at_di (doc->di);  for (wvi = 0; wvi < wv->num_entries; wvi++)    {      node_data_prob_sum = 0;      for (ni = 0; ni < crossbow_root->children_count; ni++)	{	  node = crossbow_root->children[ni];	  node_data_prob[ni] = 	    /* node->prior * */ node->words[wv->entry[wvi].wi];	  node_data_prob_sum += node_data_prob[ni];	}      /* Skip over words with zero probability everywhere */      if (node_data_prob_sum == 0)	continue;      /* Normalize the node data probs, so they are membership probabilities,	 and increment the classification score of each node */      for (ni = 0; ni < crossbow_root->children_count; ni++)	{	  node_membership[ni] = node_data_prob[ni] / node_data_prob_sum;	  scores[ni].weight += node_membership[ni] * wv->entry[wvi].count;	  score_sum += scores[ni].weight;	}    }  /* Normalize the class scores, and fill in DI's */  for (ni = 0; ni < crossbow_root->children_count; ni++)    {      scores[ni].weight /= score_sum;      scores[ni].di = ni;    }  bow_sort_scores (scores, crossbow_root->children_count);  fprintf (out, "%s ", doc->filename);  for (cisi = 0; cisi < doc->cis_size-1; cisi++)    fprintf (out, "%s,", bow_int2str (crossbow_classnames, 				      doc->cis[cisi]));  fprintf (out, "%s ", bow_int2str (crossbow_classnames, doc->cis[cisi]));  for (ni = 0; ni < crossbow_root->children_count; ni++)    {      if (scores[ni].weight <= 0)	break;      fprintf (out, "%s:%g ", 	       bow_int2str (crossbow_classnames, scores[ni].di),	       scores[ni].weight);    }  fprintf (out, "\n");  /* Return non-zero if the top ranked class is among the CIS of this doc */  for (cisi = 0; cisi < doc->cis_size; cisi++)    if (doc->cis[cisi] == scores[0].di)      return 1;  return 0;}crossbow_method multiclass_method ={  "multiclass",  NULL,  multiclass_train,  NULL,  multiclass_classify_doc};void _register_method_multiclass () __attribute__ ((constructor));void _register_method_multiclass (){  bow_method_register_with_name ((bow_method*)&multiclass_method,				 "multiclass", 				 sizeof (crossbow_method),				 NULL);}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -