⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 barrel.c

📁 良好的代码实现
💻 C
📖 第 1 页 / 共 2 页
字号:
		 "Removing words by information gain:          ");  /* Hide words from the BARREL. */  for (i = num_words_to_keep; i < wi2ig_size; i++)    {      /* Hide the WI from BARREL. */      bow_wi2dvf_hide_wi (barrel->wi2dvf, wiig_list[i].wi);      if (i % 100 == 0)	bow_verbosify (bow_progress, "\b\b\b\b\b\b\b\b\b%9d", wi2ig_size - i);     }      bow_verbosify (bow_progress, "\n");}int_bow_barrel_cdoc_write (bow_cdoc *cdoc, FILE *fp){  int ret;  ret = bow_fwrite_int (cdoc->type, fp);  ret += bow_fwrite_float (cdoc->normalizer, fp);  ret += bow_fwrite_float (cdoc->prior, fp);  ret += bow_fwrite_int (cdoc->word_count, fp);  ret += bow_fwrite_string (cdoc->filename, fp);  if (bow_file_format_version < 5)    ret += bow_fwrite_short (cdoc->class, fp);  else    ret += bow_fwrite_int (cdoc->class, fp);  return ret;}int_bow_barrel_cdoc_read (bow_cdoc *cdoc, FILE *fp){  int ret;  int type;  ret = bow_fread_int (&type, fp);  cdoc->type = type;  ret += bow_fread_float (&(cdoc->normalizer), fp);  ret += bow_fread_float (&(cdoc->prior), fp);  ret += bow_fread_int (&(cdoc->word_count), fp);  ret += bow_fread_string ((char**)&(cdoc->filename), fp);  if (bow_file_format_version < 5)    {      short s;      ret += bow_fread_short (&s, fp);      cdoc->class = s;    }  else    ret += bow_fread_int (&(cdoc->class), fp);  return ret;}/* Create and return a `barrel' by reading data from the file-pointer FP. */bow_barrel *bow_barrel_new_from_data_fp (FILE *fp){  bow_barrel *ret;  int version_tag;  int method_id;  version_tag = fgetc (fp);  /* xxx assert (version_tag >= 0); */  if (version_tag <= 0)    return NULL;  if (_bow_barrel_version != -1 && _bow_barrel_version != version_tag)    bow_error ("Trying to read bow_barrel's with different version numbers");  _bow_barrel_version = version_tag;  ret = bow_malloc (sizeof (bow_barrel));  if (_bow_barrel_version < 3)    {      bow_fread_int (&method_id, fp);      ret->method = _old_bow_methods[method_id];    }  else    {      char *method_string;      bow_fread_string (&method_string, fp);      ret->method = bow_method_at_name (method_string);      free (method_string);    }  ret->cdocs =     bow_array_new_from_data_fp ((int(*)(void*,FILE*))_bow_barrel_cdoc_read,				 _bow_barrel_cdoc_free, fp);  assert (ret->cdocs->length);  ret->wi2dvf = bow_wi2dvf_new_from_data_fp (fp);  assert (ret->wi2dvf->num_words);  return ret;}/* Decide whether to keep this or not.  Currently it it used by   rainbow-h.c. */bow_barrel *bow_barrel_new_from_data_file (const char *filename){  FILE *fp;  bow_barrel *ret_barrel;  int wi;  bow_dv *dv;  int dv_count = 0;  fp = bow_fopen (filename, "r");  ret_barrel = bow_barrel_new_from_data_fp (fp);  if (ret_barrel)    {      /* Read in all the dvf's so that we can close the FP. */      for (wi = 0; wi < ret_barrel->wi2dvf->size; wi++)	{	  dv = bow_wi2dvf_dv (ret_barrel->wi2dvf, wi);	  if (dv)	    dv_count++;	}      ret_barrel->wi2dvf->fp = NULL;      assert (dv_count);    }  fclose (fp);  return ret_barrel;}/* Write BARREL to the file-pointer FP in a machine independent format. */voidbow_barrel_write (bow_barrel *barrel, FILE *fp){  if (!barrel)    {      fputc (0, fp);		/* 0 version_tag means NULL barrel */      return;    }  fputc (BOW_DEFAULT_BARREL_VERSION, fp);  _bow_barrel_version = BOW_DEFAULT_BARREL_VERSION;  bow_fwrite_string (barrel->method->name, fp);  bow_array_write (barrel->cdocs,		   (int(*)(void*,FILE*))_bow_barrel_cdoc_write, fp);  bow_wi2dvf_write (barrel->wi2dvf, fp);}/* Print barrel to FP in human-readable and awk-accessible format. */voidbow_barrel_printf (bow_barrel *barrel, FILE *fp, const char *format){    bow_dv_heap *heap;		/* a heap of "document vectors" */  int current_di;  bow_cdoc *cdoc;  bow_verbosify (bow_progress, "Printing barrel:          ");  heap = bow_make_dv_heap_from_wi2dvf (barrel->wi2dvf);  /* Keep going until the heap is empty */  while (heap->length > 0)    {      /* Set the current document we're working on */      current_di = heap->entry[0].current_di;      assert (heap->entry[0].dv->idf == heap->entry[0].dv->idf);  /* NaN */      if (current_di % 10 == 0)	bow_verbosify (bow_progress, "\b\b\b\b\b\b%6d", current_di);      /* Here we should check if this di is part of some training set and	 move on if it isn't. */          /* Get the document */      cdoc = bow_cdocs_di2doc (barrel->cdocs, current_di);#if 0      /* If it's not a model document, then move on to next one */      if (cdoc->type != model)	{	  do 	    {	      bow_dv_heap_update (heap);	    }	  while ((current_di == heap->entry[0].current_di)		 && (heap->length > 0));	  	  /* Try again */	  continue;	}#endif      fprintf (fp, "%s", cdoc->filename);          /* Loop over all words in this document, printing out the         FORMAT-requested statistics. */      do 	{#if 0	  int wi;	  for (wi = 0; heap->entry[0].wi > wi; wi++)	    fprintf (fp, " 0");#endif	  fprintf (fp, "  %s %d %d", 		   bow_int2word (heap->entry[0].wi),		   heap->entry[0].wi,		   heap->entry[0].dv->entry[heap->entry[0].index].count);	  /* Update the heap, we are done with this di, move it to its	     new position */	  bow_dv_heap_update (heap);#if 0	  for (; heap->entry[0].wi > wi; wi++)	    fprintf (fp, " 0");#endif	}       while ((current_di == heap->entry[0].current_di)	     && (heap->length > 0));      fprintf (fp, "\n");    }  bow_free (heap);  bow_verbosify (bow_progress, "\n"); }/* Print barrel to FP in human-readable and awk-accessible format.   Step through each CDOC in BARREL->CDOCS instead of using a heap.     This way we even print out the documents that have zero words.    This function runs much more slowly than the one above. */voidbow_new_slow_barrel_printf (bow_barrel *barrel, FILE *fp, const char *format){  int di;  bow_cdoc *cdoc;  bow_de *de;  int wi, max_wi;  bow_verbosify (bow_progress, "Printing barrel:          ");  max_wi = barrel->wi2dvf->size;  for (di = 0; di < barrel->cdocs->length; di++)    {      if (barrel->cdocs->length - di % 10 == 0)	bow_verbosify (bow_progress, "\b\b\b\b\b\b%6d", 		       barrel->cdocs->length - di);      cdoc = bow_array_entry_at_index (barrel->cdocs, di);      fprintf (fp, "%s", cdoc->filename);      for (wi = 0; wi < max_wi; wi++)	{	  de = bow_wi2dvf_entry_at_wi_di (barrel->wi2dvf, wi, di);	  if (de)	    fprintf (fp, "  %s %d %d", 		     bow_int2word (wi),		     wi,		     de->count);	}      fprintf (fp, "\n");    }  bow_verbosify (bow_progress, "\n"); }/* Print on stdout the number of times WORD occurs in the various   docs/classes of BARREL. */voidbow_barrel_print_word_count (bow_barrel *barrel, const char *word){  int wi;  bow_dv *dv;  int dvi;  bow_cdoc *cdoc;    wi = bow_word2int (word);  if (wi == -1)    {      fprintf (stderr, "No such word `%s' in dictionary\n", word);      exit (-1);    }  dv = bow_wi2dvf_dv (barrel->wi2dvf, wi);  if (!dv)    {      fprintf (stderr, "No document vector for word `%s'\n", word);      return;    }  for (dvi = 0; dvi < dv->length; dvi++)     {      cdoc = bow_array_entry_at_index (barrel->cdocs, 				       dv->entry[dvi].di);      printf ("%9d / %9d  (%9.5f) %s\n", 	      dv->entry[dvi].count, 	      cdoc->word_count,	      ((float)dv->entry[dvi].count / cdoc->word_count),	      cdoc->filename);    }}/* Free the memory held by BARREL. */voidbow_barrel_free (bow_barrel *barrel){  if (barrel->wi2dvf)    bow_wi2dvf_free (barrel->wi2dvf);  if (barrel->cdocs)    bow_array_free (barrel->cdocs);  bow_free (barrel);}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -