⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 rainbow.c

📁 机器学习作者tom mitchell的书上代码
💻 C
📖 第 1 页 / 共 5 页
字号:
    bow_barrel_new_from_printed_barrel_file    (filename, rainbow_arg_state.barrel_printing_format);  /* Combine the documents into class statistics. */  rainbow_class_barrel =     bow_barrel_new_vpc_with_weights (rainbow_doc_barrel);}/* Index each line of ARCHER_ARG_STATE.DIRNAME as if it were a   separate file, named after the line number. Does not deal with labels.*/voidrainbow_index_lines (const char *filename){  static const int max_line_length = 40000;  char *buf;  int n, classindex, nchars;  FILE *fp;  bow_cdoc cdoc;  int di;  char docname[BOW_MAX_WORD_LENGTH];  char classname[BOW_MAX_WORD_LENGTH];  buf = alloca (max_line_length);  rainbow_doc_barrel = bow_barrel_new (0, 0, sizeof (bow_cdoc), NULL);  fp = bow_fopen (filename, "r");  bow_verbosify (bow_progress, "Indexing lines:              ");  di = 0;  while (fgets (buf, max_line_length, fp))    {      if (buf[0] == '%')	continue;      n = sscanf (buf, "%s %d %n", docname, &classindex, &nchars);      assert (n >= 2);      if (classindex < 0)	classindex = 0;      sprintf (classname, "class%d", classindex);      if (!(rainbow_doc_barrel->classnames))	rainbow_doc_barrel->classnames = bow_int4str_new (0);      classindex = bow_str2int (rainbow_doc_barrel->classnames, classname);      cdoc.type = bow_doc_train;      cdoc.class = classindex;      /* Set to one so bow_infogain_per_wi_new() works correctly	 by default. */      cdoc.prior = 1.0f;      assert (cdoc.class >= 0);      cdoc.filename = strdup (docname);      assert (cdoc.filename);      cdoc.class_probs = NULL;      /* Add the CDOC to CDOCS, and determine the "index" of this	 document. */      di = bow_array_append (rainbow_doc_barrel->cdocs, &cdoc);      if (strlen (buf+nchars))	bow_wi2dvf_add_di_text_str (&(rainbow_doc_barrel->wi2dvf), di, 				    buf+nchars, docname);      di++;      if (di % 100 == 0)	bow_verbosify(bow_progress, "\b\b\b\b\b\b%6d", di);    }  fclose (fp);  bow_verbosify (bow_progress, "\n");  /* Combine the documents into class statistics. */  rainbow_class_barrel =     bow_barrel_new_vpc_with_weights (rainbow_doc_barrel);}/* Perform a query. *//* Print the contents of file FILENAME to stdout. */static inline voidprint_file (const char *filename){  FILE *fp;  int byte;  if ((fp = fopen (filename, "r")) == NULL)    bow_error ("Couldn't open file `%s' for reading", filename);  while ((byte = fgetc (fp)) != EOF)    fputc (byte, stdout);  fclose (fp);}int iBrokenPipe = 0;                    /* drapp-2/10 */jmp_buf env;                            /* drapp-2/10 *//* Get some query text, and print its best-matching documents among   those previously indexed.  The number of matching documents is   NUM_HITS_TO_SHOW.  If QUERY_FILENAME is non-null, the query text   will be obtained from that file; otherwise it will be prompted for   and read from stdin. */intrainbow_query (FILE *in, FILE *out){  /* Show as many hits as there are classes. */  int num_hits_to_show;  bow_score *hits;  int actual_num_hits;  int i;  bow_wv *query_wv = NULL;  num_hits_to_show = bow_barrel_num_classes (rainbow_class_barrel);  hits = alloca (sizeof (bow_score) * num_hits_to_show);  /* Commented out for WhizBang --query-server */#if 0  /* (Re)set the weight-setting method, if requested with a `-m' on     the command line. */  /* If we don't have the document barrel, we can't do this... */  if (rainbow_doc_barrel)    {      if (bow_argp_method)	rainbow_doc_barrel->method = (rainbow_method*)bow_argp_method;      else	rainbow_doc_barrel->method = rainbow_default_method;    }  if (bow_prune_vocab_by_infogain_n      && rainbow_doc_barrel)    {      /* Change barrel by removing words with small information gain. */      bow_barrel_keep_top_words_by_infogain	(bow_prune_vocab_by_infogain_n, rainbow_doc_barrel, 	 bow_barrel_num_classes (rainbow_class_barrel));    }  /* Infogain pruning must be done before this vocab_map pruning, because     infogain pruning first unhides all words! */  if (rainbow_arg_state.vocab_map)    {      if (rainbow_doc_barrel)	/* Remove words not in the VOCAB_MAP. */	bow_barrel_prune_words_not_in_map (rainbow_doc_barrel,					   rainbow_arg_state.vocab_map);      if (rainbow_class_barrel)	/* Remove words not in the VOCAB_MAP. */	bow_barrel_prune_words_not_in_map (rainbow_class_barrel,					   rainbow_arg_state.vocab_map);    }  if (rainbow_arg_state.hide_vocab_map      && rainbow_doc_barrel)    {      bow_barrel_prune_words_in_map (rainbow_doc_barrel,				     rainbow_arg_state.hide_vocab_map);    }  /* Re-build the rainbow_class_barrel, if necessary */  /* Make sure that we have the document barrel */  if (rainbow_doc_barrel &&      (rainbow_doc_barrel->method != rainbow_class_barrel->method       || rainbow_arg_state.vocab_map       || rainbow_arg_state.hide_vocab_map       || bow_prune_vocab_by_infogain_n))    {      bow_free_barrel (rainbow_class_barrel);      rainbow_class_barrel = 	bow_barrel_new_vpc_with_weights (rainbow_doc_barrel);    }#endif  /* Get the query text, and create a "word vector" from the query text. */ query_again:  if (rainbow_arg_state.query_filename)    {      FILE *fp;      fp = bow_fopen (rainbow_arg_state.query_filename, "r");      query_wv = bow_wv_new_from_text_fp (fp, 					  rainbow_arg_state.query_filename);      fclose (fp);    }  else    {      if (rainbow_arg_state.what_doing != rainbow_query_serving)	bow_verbosify (bow_quiet, 		       "Type your query text now.  End with a Control-D.\n");      if (feof (in))	clearerr (in);      query_wv = bow_wv_new_from_text_fp (in, NULL);    }  if (query_wv == NULL || query_wv->num_entries == 0)    {      if (rainbow_arg_state.query_filename)	bow_verbosify (bow_quiet, "No query text found in `%s'.\n", 		       rainbow_arg_state.query_filename);      else	if (rainbow_arg_state.what_doing != rainbow_query_serving)	  bow_verbosify (bow_quiet, "No query text found.");	else	  {	    fprintf(out, ".\n");            if ( sigsetjmp(env, 0) == 0 )                /* drapp-2/10 */	      {		fflush(out);	      }	    else	      {		iBrokenPipe = 1;                           /* drapp-2/10 */	      }	  }      if (rainbow_arg_state.repeat_query)	bow_verbosify (bow_progress, "  Stopping query repeat\n");      return 0;    }  /* Remove words not in the class_barrel */  bow_wv_prune_words_not_in_wi2dvf (query_wv, rainbow_class_barrel->wi2dvf);#if 0  /* Print the WV, just for debugging */  bow_wv_fprintf (stderr, query_wv);  fflush (stderr);#endif  /* Get the best matching documents. */  /* When using vpc-only, we should use a method that specifies weight   * and normalization functions which do not use the doc barrel */#if 0  if (rainbow_doc_barrel)    {      bow_wv_set_weights (query_wv, rainbow_doc_barrel);      bow_wv_normalize_weights (query_wv, rainbow_doc_barrel);    }  else#endif    {      bow_wv_set_weights (query_wv, rainbow_class_barrel);      bow_wv_normalize_weights (query_wv, rainbow_class_barrel);    }        actual_num_hits = bow_barrel_score  (rainbow_class_barrel, query_wv,				       hits, num_hits_to_show, -1);  bow_free (query_wv);  /* Print them. */  if (rainbow_arg_state.what_doing != rainbow_query_serving)    fprintf (out, "\n");  for (i = 0; i < actual_num_hits; i++)    {      bow_cdoc *cdoc = bow_array_entry_at_index (rainbow_class_barrel->cdocs, 						 hits[i].di);      if (strlen (rainbow_arg_state.output_filename))	{	  char buf[1024];	  strcpy (buf, cdoc->filename);	  strcat (buf, "/");	  strcat (buf, rainbow_arg_state.output_filename);	  print_file (buf);	}      else	{	  /* For the sake CommonLisp, don't print numbers smaller than	     1e-35, because it can't `(read)' them. */	  if (rainbow_arg_state.use_lisp_score_truncation	      && hits[i].weight < 1e-35	      && hits[i].weight > 0)	    hits[i].weight = 0;	  fprintf (out, "%s %.*g\n", 		   /* cdoc->filename,*/		   /* When knn runs, CDOCS entries correspond to documents		    * rather than classes.  We want to print class names. */		   bow_int2str (rainbow_class_barrel->classnames, hits[i].di),		   bow_score_print_precision, hits[i].weight);	}    }  if (rainbow_arg_state.what_doing == rainbow_query_serving)    fprintf(out, ".\n");  if ( sigsetjmp(env, 0) == 0 )                         /* drapp-2/10 */    {      fflush(out);    }  else    {      iBrokenPipe = 1;                                   /* drapp-2/10 */    }  if (rainbow_arg_state.repeat_query)    goto query_again;  return actual_num_hits;}void SigPipeHandler( int iParm )                       /* drapp-2/10 */{  bow_verbosify (bow_progress, "Broken Pipe.\n");  siglongjmp( env, 1 );}voidrainbow_socket_init (const char *socket_name, int use_unix_socket){  int servlen, type, bind_ret;  struct sockaddr_in in_addr;  struct sockaddr *sap;  type = use_unix_socket ? AF_UNIX : AF_INET;     rainbow_sockfd = socket(type, SOCK_STREAM, 0);  assert(rainbow_sockfd >= 0);  if (type == AF_UNIX)    {#ifdef WINNT      servlen = 0;  /* so that the compiler is happy */      sap = 0;      assert(WINNT == 0);#else /* !WINNT */      struct sockaddr_un un_addr;      sap = (struct sockaddr *)&un_addr;      bzero((char *)sap, sizeof(un_addr));      strcpy(un_addr.sun_path, socket_name);      servlen = strlen(un_addr.sun_path) + sizeof(un_addr.sun_family) + 1;#endif /* WINNT */    }  else    {      sap = (struct sockaddr *)&in_addr;      bzero((char *)sap, sizeof(in_addr));      in_addr.sin_port = htons(atoi(socket_name));      in_addr.sin_addr.s_addr = htonl(INADDR_ANY);      servlen = sizeof(in_addr);    }  sap->sa_family = type;       bind_ret = bind(rainbow_sockfd, sap, servlen);  assert(bind_ret >= 0);  listen(rainbow_sockfd, 5);}voidrainbow_serve (){  int newsockfd, clilen;  struct sockaddr cli_addr;  FILE *in, *out;  pid_t pid;  iBrokenPipe = 0;                              /* drapp-2/10 */  clilen = sizeof(cli_addr);  bow_verbosify (bow_progress, "Waiting for connection...\n");  newsockfd = accept(rainbow_sockfd, &cli_addr, &clilen);  assert(newsockfd >= 0);  in = fdopen(newsockfd, "r");  out = fdopen(newsockfd, "w");  if (rainbow_arg_state.forking_server)    {      if ((pid = fork ()) != 0)	{	  /* Parent - return to server mode */	  fclose (in);	  fclose (out);	  close (newsockfd);	  return;	}    }    bow_verbosify (bow_progress, "Got connection.\n");  while (!feof(in) && !iBrokenPipe)             /* drapp-2/10 */    rainbow_query(in, out);  fclose(in);  fclose(out);  close(newsockfd);  bow_verbosify (bow_progress, "Closed connection.\n");  /* Kill the child - don't want it hanging around, sucking up memory */  if (rainbow_arg_state.forking_server)    exit (0);}#if RAINBOW_LISP/* Setup rainbow so that we can do our lisp interface. */voidrainbow_lisp_setup (char *datadirname){  /* Defined in deflexer.c */  extern void _bow_default_lexer_init ();  /* Defined in naivebayes.c */  extern void _register_method_crossentropy ();  extern void _register_method_naivebayes ();  /* Defined in tfidf.c */  extern void _register_method_tfidf_words ();  extern void _register_method_tfidf_log_words ();  extern void _register_method_tfidf_log_occur ();  /* Defined in prind.c */  extern void _register_method_prind ();  extern void _register_method_svm ();  char *dirname = bow_malloc (strlen (datadirname) + 1);  int argc;  static char *argv[] = {    "rainbow-lisp-interface",    "-q",    "-H",    "-h",    "-s",

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -