⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 arrow.c

📁 在Linux下处理英语文本分类
💻 C
📖 第 1 页 / 共 2 页
字号:
		     bow_int2word (wv1->entry[wvi1].wi),		     score);	}    }  printf ("%g\n", score);}voidarrow_socket_init (const char *socket_name, int use_unix_socket){  int servlen, type, bind_ret;  struct sockaddr_in in_addr;  struct sockaddr *sap;  type = use_unix_socket ? AF_UNIX : AF_INET;     arrow_sockfd = socket (type, SOCK_STREAM, 0);  assert (arrow_sockfd >= 0);  if (type == AF_UNIX)    {#ifdef WINNT      servlen = 0;  /* so that the compiler is happy */      sap = 0;      assert(WINNT == 0);#else /* !WINNT */      struct sockaddr_un un_addr;      sap = (struct sockaddr *)&un_addr;      bzero ((char *)sap, sizeof (un_addr));      strcpy (un_addr.sun_path, socket_name);      servlen = strlen (un_addr.sun_path) + sizeof(un_addr.sun_family) + 1;#endif /* WINNT */    }  else    {      sap = (struct sockaddr *)&in_addr;      bzero ((char *)sap, sizeof (in_addr));      in_addr.sin_port = htons (atoi (socket_name));      in_addr.sin_addr.s_addr = htonl (INADDR_ANY);      servlen = sizeof (in_addr);    }  sap->sa_family = type;       bind_ret = bind (arrow_sockfd, sap, servlen);  assert (bind_ret >= 0);  bow_verbosify (bow_progress, "Listening on port %d\n", atoi (socket_name));  listen (arrow_sockfd, 5);}/* We assume that commands are no longer than 1024 characters in length *//* At the moment, we assume that the only possible command is ",HITS <num>" */voidarrow_process_commands (FILE *fd, int *num_hits){  int first;  char buf[1024];  /* checks the first character of the line */  while ((first = fgetc(fd)))    {      if (first != ',')      {        ungetc (first, fd);        return;      }      /* retrieves the rest of the line */      fgets ((char *) buf, 1024, fd);      sscanf (buf, "HITS %d", num_hits);    }}voidarrow_serve (){  int newsockfd, clilen;  struct sockaddr cli_addr;  FILE *in, *out;  int num_hits_to_show;  int pid;   clilen = sizeof (cli_addr);  newsockfd = accept (arrow_sockfd, &cli_addr, &clilen);    if (newsockfd == -1)    bow_error ("Not able to accept connections!\n");  bow_verbosify (bow_progress, "Accepted connection\n");  if (arrow_arg_state.serve_with_forking)    {      if ((pid = fork()) != 0)      {        /* parent - return to server mode */        close (newsockfd);        return;      }    }  assert(newsockfd >= 0);  in = fdopen (newsockfd, "r");  out = fdopen (newsockfd, "w");  /* Get the number of hits to show */  num_hits_to_show = arrow_arg_state.num_hits_to_show;  bow_verbosify (bow_progress, "Processing special commands...\n");  /* Strips any special commands from the beginning of the stream */  arrow_process_commands (in, &num_hits_to_show);  bow_verbosify (bow_progress, "Processing query...\n");  while (!feof(in))    arrow_query (in, out, num_hits_to_show);  fclose(in);  fclose(out);  close(newsockfd);  bow_verbosify (bow_progress, "Closed connection:");   /* Kill the child - don't want it hanging around, sucking up memory :) */  if (arrow_arg_state.serve_with_forking)    exit(0);}/* Beware of quickly written spaghetti code! */voidarrow_serve2 (){  int newsockfd, clilen;  struct sockaddr cli_addr;  FILE *in, *out;  int num_hits_to_show, actual_num_hits;  int pid;  char cmdbuf[128];  char filename[256];  char *query;  bow_wv *query_wv;  bow_score *hits;  int hi;  hits = alloca (sizeof (bow_score) * arrow_barrel->cdocs->length);   clilen = sizeof (cli_addr);  newsockfd = accept (arrow_sockfd, &cli_addr, &clilen);    if (newsockfd == -1)    bow_error ("Not able to accept connections!\n");  bow_verbosify (bow_progress, "Accepted connection\n");  if (arrow_arg_state.serve_with_forking)    {      if ((pid = fork()) != 0)      {        /* parent - return to server mode */        close (newsockfd);        return;      }    }  assert(newsockfd >= 0);  in = fdopen (newsockfd, "r");  out = fdopen (newsockfd, "w");  /* Read in the first word from the input.  It is expected to be a     command: either "rank" or "query". */ again:  if (fscanf (in, "%s", cmdbuf) != 1)    goto done;  fprintf (stderr, "Doing command `%s'\n", cmdbuf);  if (strcmp ("query", cmdbuf) == 0)    {      filename[0] = '\0';      fscanf (in, "%a[^\r\n]", &query);      fprintf (stderr, "Got query `%s'\n", query);#if 0      fprintf (stderr, "`query' command not yet handled!\n");      free (query);      goto again;#endif    }  else if (strcmp ("rank", cmdbuf) == 0)    {      fscanf (in, "%s", filename);      fscanf (in, "%a[^\r\n]", &query);      fprintf (stderr, "Got filename `%s'\n", filename);      fprintf (stderr, "Got query `%s'\n", query);    }  else if (strcmp ("quit", cmdbuf) == 0)    {      goto done;    }  else if (strcmp ("help", cmdbuf) == 0)    {      fprintf (out, "<?xml version='1.0' encoding='US-ASCII' ?>\n"	       "<arrow-result><help>\n"	       "   Commands available to you\n"	       "   help                      print this message\n"	       "   rank <filename> <query>   give rank of <filename> <query>'s results\n"	       "   query <query>             search for <str>\n"	       "</help></arrow-result>\n.\n");      fflush (out);      fscanf (in, "%a[^\r\n]", &query);      free (query);      goto again;    }  else    {      bow_verbosify (bow_progress, "Unrecognized command `%s'.  "		     "Closing connection.\n",		     cmdbuf);      goto done;    }  /* Create a word vector from the query string. */  query_wv = bow_wv_new_from_text_string (query);  if (query_wv == NULL)    {      actual_num_hits = 0;      goto print;    }  fprintf (stderr, "Query WV has length %d\n", query_wv->num_entries);  free (query);  if (filename[0])    num_hits_to_show = arrow_barrel->cdocs->length;  else    num_hits_to_show = arrow_arg_state.num_hits_to_show;  bow_wv_set_weights (query_wv, arrow_barrel);  /* If none of the words have a non-zero IDF, just return zero. */  if (bow_wv_weight_sum (query_wv) == 0)    {      actual_num_hits = 0;      goto print;    }  bow_wv_normalize_weights (query_wv, arrow_barrel);  /* Get the best matching documents. */  actual_num_hits = bow_barrel_score (arrow_barrel, query_wv,				      hits, num_hits_to_show, -1); print:  fprintf (stderr, "Got %d hits\n", actual_num_hits);  if (filename[0])    {      /* Handle a "rank" command */      int rank, hi;      bow_cdoc *cdoc;      int count = actual_num_hits;      for (rank = -1, hi = 0; hi < count; hi++)	{	  cdoc = bow_array_entry_at_index (arrow_barrel->cdocs, hits[hi].di);	  if (strcmp (cdoc->filename, filename) == 0)	    {	      rank = hi;	      break;	    }	}      fprintf (out, "<?xml version='1.0' encoding='US-ASCII' ?>\n" 	       "<arrow-result>\n" 	       "<rank-result>\n" 	       "  <count>%d</count>\n",  	       count);      if (rank != -1)	fprintf (out, "  <rank>%d</rank>\n", rank);      fprintf (out, "</rank-result>\n"	       "</arrow-result>\n.\n");    }  else    {      /* Handle a "query" command */      fprintf (out, "<?xml version='1.0' encoding='US-ASCII' ?>\n"	       "<arrow-result>\n"	       "<hitlist>\n"	       "<count>%d</count>\n", 	       actual_num_hits);      for (hi = 0; hi < actual_num_hits; hi++)	{	  fprintf (out, 		   "<hit>\n"		   "   <id>%d</id>\n"		   "   <name>%s</name>\n"		   "   <score>%g</score>\n"		   "</hit>\n",		   hits[hi].di, hits[hi].name, hits[hi].weight);	}    }  fflush (out);  for (hi = 0; hi < actual_num_hits; hi++)    if (hits[hi].name)      bow_free ((void*)hits[hi].name);  /* Handle another query */  goto again; done:  fclose(in);  fclose(out);  close(newsockfd);  bow_verbosify (bow_progress, "Closed connection\n");   /* Kill the child - don't want it hanging around, sucking up memory :) */  if (arrow_arg_state.serve_with_forking)    exit(0);}voidarrow_coo (){  int wi;  bow_wi2dvf *wicoo;  int num_hides;  num_hides = bow_wi2dvf_hide_words_by_doc_count (arrow_barrel->wi2dvf, 6);  bow_verbosify (bow_progress, "%d words hidden\n", num_hides);  wicoo = (bow_wi2dvf*) bow_wicoo_from_barrel (arrow_barrel);#define PRINT_WORD_PROBS 1#if PRINT_WORD_PROBS  {    bow_dv *dv;    printf ("Word probabilities:\n");    for (wi = 0; wi < wicoo->size; wi++)      {	dv = bow_wi2dvf_dv (wicoo, wi);	if (dv) 	  printf ("_uniform %-12.7f %s\n", dv->idf, bow_int2word (wi));      }  }#endif /* PRINT_WORD_PROBS */  for (wi = 0; wi < bow_num_words (); wi++)    {      /* printf ("%s  new word\n", bow_int2word (wi)); */      bow_wicoo_print_word_entropy (wicoo, wi);    }}/* The main() function. */intmain (int argc, char *argv[]){  /* Prevents zombie children in System V environments */  signal (SIGCHLD, SIG_IGN);  /* Default command-line argument values */  arrow_arg_state.num_hits_to_show = 10;  arrow_arg_state.what_doing = arrow_indexing;  arrow_arg_state.query_filename = NULL;  arrow_arg_state.serve_with_forking = 0;  /* Parse the command-line arguments. */  argp_parse (&arrow_argp, argc, argv, 0, 0, &arrow_arg_state);  if (arrow_arg_state.what_doing == arrow_indexing)    {      if (arrow_index (argc, argv))	arrow_archive ();      else	bow_error ("No text documents found.");    }  else    {      arrow_unarchive ();#if 0      /* xxx */      arrow_barrel->method = &bow_method_tfidf;      bow_barrel_set_weights (arrow_barrel);      bow_barrel_normalize_weights (arrow_barrel);#endif      if (arrow_arg_state.what_doing == arrow_querying)	{	  arrow_query (stdin, stdout, arrow_arg_state.num_hits_to_show);	}      else if (arrow_arg_state.what_doing == arrow_comparing)	{	  bow_wv *query_wv;	  bow_wv *compare_wv;	  FILE *fp;	  /* The user must specify the query filename on the command line.	     In this case it is not optional. */	  assert (arrow_arg_state.query_filename);	  /* Make word vectors from the files. */	  fp = bow_fopen (arrow_arg_state.query_filename, "r");	  query_wv = bow_wv_new_from_text_fp (fp,					      arrow_arg_state.query_filename);	  fclose (fp);	  fp = bow_fopen (arrow_arg_state.compare_filename, "r");	  compare_wv = bow_wv_new_from_text_fp	    (fp, arrow_arg_state.compare_filename);	  fclose (fp);	  arrow_compare (query_wv, compare_wv);	}      else if (arrow_arg_state.what_doing == arrow_printing_idf)	{	  int wi;	  int max_wi = MIN (arrow_barrel->wi2dvf->size, bow_num_words());	  bow_dv *dv;	  for (wi = 0; wi < max_wi; wi++)	    {	      dv = bow_wi2dvf_dv (arrow_barrel->wi2dvf, wi);	      if (dv)		printf ("%9f %s\n", dv->idf, bow_int2word (wi));	    }	}      else if (arrow_arg_state.what_doing == arrow_query_serving)	{	  arrow_socket_init (arrow_arg_state.server_port_num, 0);	  if (arrow_arg_state.serve_with_forking)	    {	      /*	      int wi;	      bow_dv *dv;	      */	      /* Touch all DV's so we read them into memory before forking */	      /* This is *very bad* unless you are dealing with a small	       * model or need maximum performance! */	      /*	      for (wi = 0; wi < arrow_barrel->wi2dvf->size; wi++)		dv = bow_wi2dvf_dv (arrow_barrel->wi2dvf, wi);	      */	    }	  while (1)	    arrow_serve2 ();	}      else if (arrow_arg_state.what_doing == arrow_printing_coo)	{	  arrow_coo ();	}      else	bow_error ("Internal error");    }  exit (0);}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -