📄 arrow.c
字号:
bow_int2word (wv1->entry[wvi1].wi), score); } } printf ("%g\n", score);}voidarrow_socket_init (const char *socket_name, int use_unix_socket){ int servlen, type, bind_ret; struct sockaddr_in in_addr; struct sockaddr *sap; type = use_unix_socket ? AF_UNIX : AF_INET; arrow_sockfd = socket (type, SOCK_STREAM, 0); assert (arrow_sockfd >= 0); if (type == AF_UNIX) {#ifdef WINNT servlen = 0; /* so that the compiler is happy */ sap = 0; assert(WINNT == 0);#else /* !WINNT */ struct sockaddr_un un_addr; sap = (struct sockaddr *)&un_addr; bzero ((char *)sap, sizeof (un_addr)); strcpy (un_addr.sun_path, socket_name); servlen = strlen (un_addr.sun_path) + sizeof(un_addr.sun_family) + 1;#endif /* WINNT */ } else { sap = (struct sockaddr *)&in_addr; bzero ((char *)sap, sizeof (in_addr)); in_addr.sin_port = htons (atoi (socket_name)); in_addr.sin_addr.s_addr = htonl (INADDR_ANY); servlen = sizeof (in_addr); } sap->sa_family = type; bind_ret = bind (arrow_sockfd, sap, servlen); assert (bind_ret >= 0); bow_verbosify (bow_progress, "Listening on port %d\n", atoi (socket_name)); listen (arrow_sockfd, 5);}/* We assume that commands are no longer than 1024 characters in length *//* At the moment, we assume that the only possible command is ",HITS <num>" */voidarrow_process_commands (FILE *fd, int *num_hits){ int first; char buf[1024]; /* checks the first character of the line */ while ((first = fgetc(fd))) { if (first != ',') { ungetc (first, fd); return; } /* retrieves the rest of the line */ fgets ((char *) buf, 1024, fd); sscanf (buf, "HITS %d", num_hits); }}voidarrow_serve (){ int newsockfd, clilen; struct sockaddr cli_addr; FILE *in, *out; int num_hits_to_show; int pid; clilen = sizeof (cli_addr); newsockfd = accept (arrow_sockfd, &cli_addr, &clilen); if (newsockfd == -1) bow_error ("Not able to accept connections!\n"); bow_verbosify (bow_progress, "Accepted connection\n"); if (arrow_arg_state.serve_with_forking) { if ((pid = fork()) != 0) { /* parent - return to server mode */ close (newsockfd); return; } } assert(newsockfd >= 0); in = fdopen (newsockfd, "r"); out = fdopen (newsockfd, "w"); /* Get the number of hits to show */ num_hits_to_show = arrow_arg_state.num_hits_to_show; bow_verbosify (bow_progress, "Processing special commands...\n"); /* Strips any special commands from the beginning of the stream */ arrow_process_commands (in, &num_hits_to_show); bow_verbosify (bow_progress, "Processing query...\n"); while (!feof(in)) arrow_query (in, out, num_hits_to_show); fclose(in); fclose(out); close(newsockfd); bow_verbosify (bow_progress, "Closed connection:"); /* Kill the child - don't want it hanging around, sucking up memory :) */ if (arrow_arg_state.serve_with_forking) exit(0);}/* Beware of quickly written spaghetti code! */voidarrow_serve2 (){ int newsockfd, clilen; struct sockaddr cli_addr; FILE *in, *out; int num_hits_to_show, actual_num_hits; int pid; char cmdbuf[128]; char filename[256]; char *query; bow_wv *query_wv; bow_score *hits; int hi; hits = alloca (sizeof (bow_score) * arrow_barrel->cdocs->length); clilen = sizeof (cli_addr); newsockfd = accept (arrow_sockfd, &cli_addr, &clilen); if (newsockfd == -1) bow_error ("Not able to accept connections!\n"); bow_verbosify (bow_progress, "Accepted connection\n"); if (arrow_arg_state.serve_with_forking) { if ((pid = fork()) != 0) { /* parent - return to server mode */ close (newsockfd); return; } } assert(newsockfd >= 0); in = fdopen (newsockfd, "r"); out = fdopen (newsockfd, "w"); /* Read in the first word from the input. It is expected to be a command: either "rank" or "query". */ again: if (fscanf (in, "%s", cmdbuf) != 1) goto done; fprintf (stderr, "Doing command `%s'\n", cmdbuf); if (strcmp ("query", cmdbuf) == 0) { filename[0] = '\0'; fscanf (in, "%a[^\r\n]", &query); fprintf (stderr, "Got query `%s'\n", query);#if 0 fprintf (stderr, "`query' command not yet handled!\n"); free (query); goto again;#endif } else if (strcmp ("rank", cmdbuf) == 0) { fscanf (in, "%s", filename); fscanf (in, "%a[^\r\n]", &query); fprintf (stderr, "Got filename `%s'\n", filename); fprintf (stderr, "Got query `%s'\n", query); } else if (strcmp ("quit", cmdbuf) == 0) { goto done; } else if (strcmp ("help", cmdbuf) == 0) { fprintf (out, "<?xml version='1.0' encoding='US-ASCII' ?>\n" "<arrow-result><help>\n" " Commands available to you\n" " help print this message\n" " rank <filename> <query> give rank of <filename> <query>'s results\n" " query <query> search for <str>\n" "</help></arrow-result>\n.\n"); fflush (out); fscanf (in, "%a[^\r\n]", &query); free (query); goto again; } else { bow_verbosify (bow_progress, "Unrecognized command `%s'. " "Closing connection.\n", cmdbuf); goto done; } /* Create a word vector from the query string. */ query_wv = bow_wv_new_from_text_string (query); if (query_wv == NULL) { actual_num_hits = 0; goto print; } fprintf (stderr, "Query WV has length %d\n", query_wv->num_entries); free (query); if (filename[0]) num_hits_to_show = arrow_barrel->cdocs->length; else num_hits_to_show = arrow_arg_state.num_hits_to_show; bow_wv_set_weights (query_wv, arrow_barrel); /* If none of the words have a non-zero IDF, just return zero. */ if (bow_wv_weight_sum (query_wv) == 0) { actual_num_hits = 0; goto print; } bow_wv_normalize_weights (query_wv, arrow_barrel); /* Get the best matching documents. */ actual_num_hits = bow_barrel_score (arrow_barrel, query_wv, hits, num_hits_to_show, -1); print: fprintf (stderr, "Got %d hits\n", actual_num_hits); if (filename[0]) { /* Handle a "rank" command */ int rank, hi; bow_cdoc *cdoc; int count = actual_num_hits; for (rank = -1, hi = 0; hi < count; hi++) { cdoc = bow_array_entry_at_index (arrow_barrel->cdocs, hits[hi].di); if (strcmp (cdoc->filename, filename) == 0) { rank = hi; break; } } fprintf (out, "<?xml version='1.0' encoding='US-ASCII' ?>\n" "<arrow-result>\n" "<rank-result>\n" " <count>%d</count>\n", count); if (rank != -1) fprintf (out, " <rank>%d</rank>\n", rank); fprintf (out, "</rank-result>\n" "</arrow-result>\n.\n"); } else { /* Handle a "query" command */ fprintf (out, "<?xml version='1.0' encoding='US-ASCII' ?>\n" "<arrow-result>\n" "<hitlist>\n" "<count>%d</count>\n", actual_num_hits); for (hi = 0; hi < actual_num_hits; hi++) { fprintf (out, "<hit>\n" " <id>%d</id>\n" " <name>%s</name>\n" " <score>%g</score>\n" "</hit>\n", hits[hi].di, hits[hi].name, hits[hi].weight); } } fflush (out); for (hi = 0; hi < actual_num_hits; hi++) if (hits[hi].name) bow_free ((void*)hits[hi].name); /* Handle another query */ goto again; done: fclose(in); fclose(out); close(newsockfd); bow_verbosify (bow_progress, "Closed connection\n"); /* Kill the child - don't want it hanging around, sucking up memory :) */ if (arrow_arg_state.serve_with_forking) exit(0);}voidarrow_coo (){ int wi; bow_wi2dvf *wicoo; int num_hides; num_hides = bow_wi2dvf_hide_words_by_doc_count (arrow_barrel->wi2dvf, 6); bow_verbosify (bow_progress, "%d words hidden\n", num_hides); wicoo = (bow_wi2dvf*) bow_wicoo_from_barrel (arrow_barrel);#define PRINT_WORD_PROBS 1#if PRINT_WORD_PROBS { bow_dv *dv; printf ("Word probabilities:\n"); for (wi = 0; wi < wicoo->size; wi++) { dv = bow_wi2dvf_dv (wicoo, wi); if (dv) printf ("_uniform %-12.7f %s\n", dv->idf, bow_int2word (wi)); } }#endif /* PRINT_WORD_PROBS */ for (wi = 0; wi < bow_num_words (); wi++) { /* printf ("%s new word\n", bow_int2word (wi)); */ bow_wicoo_print_word_entropy (wicoo, wi); }}/* The main() function. */intmain (int argc, char *argv[]){ /* Prevents zombie children in System V environments */ signal (SIGCHLD, SIG_IGN); /* Default command-line argument values */ arrow_arg_state.num_hits_to_show = 10; arrow_arg_state.what_doing = arrow_indexing; arrow_arg_state.query_filename = NULL; arrow_arg_state.serve_with_forking = 0; /* Parse the command-line arguments. */ argp_parse (&arrow_argp, argc, argv, 0, 0, &arrow_arg_state); if (arrow_arg_state.what_doing == arrow_indexing) { if (arrow_index (argc, argv)) arrow_archive (); else bow_error ("No text documents found."); } else { arrow_unarchive ();#if 0 /* xxx */ arrow_barrel->method = &bow_method_tfidf; bow_barrel_set_weights (arrow_barrel); bow_barrel_normalize_weights (arrow_barrel);#endif if (arrow_arg_state.what_doing == arrow_querying) { arrow_query (stdin, stdout, arrow_arg_state.num_hits_to_show); } else if (arrow_arg_state.what_doing == arrow_comparing) { bow_wv *query_wv; bow_wv *compare_wv; FILE *fp; /* The user must specify the query filename on the command line. In this case it is not optional. */ assert (arrow_arg_state.query_filename); /* Make word vectors from the files. */ fp = bow_fopen (arrow_arg_state.query_filename, "r"); query_wv = bow_wv_new_from_text_fp (fp, arrow_arg_state.query_filename); fclose (fp); fp = bow_fopen (arrow_arg_state.compare_filename, "r"); compare_wv = bow_wv_new_from_text_fp (fp, arrow_arg_state.compare_filename); fclose (fp); arrow_compare (query_wv, compare_wv); } else if (arrow_arg_state.what_doing == arrow_printing_idf) { int wi; int max_wi = MIN (arrow_barrel->wi2dvf->size, bow_num_words()); bow_dv *dv; for (wi = 0; wi < max_wi; wi++) { dv = bow_wi2dvf_dv (arrow_barrel->wi2dvf, wi); if (dv) printf ("%9f %s\n", dv->idf, bow_int2word (wi)); } } else if (arrow_arg_state.what_doing == arrow_query_serving) { arrow_socket_init (arrow_arg_state.server_port_num, 0); if (arrow_arg_state.serve_with_forking) { /* int wi; bow_dv *dv; */ /* Touch all DV's so we read them into memory before forking */ /* This is *very bad* unless you are dealing with a small * model or need maximum performance! */ /* for (wi = 0; wi < arrow_barrel->wi2dvf->size; wi++) dv = bow_wi2dvf_dv (arrow_barrel->wi2dvf, wi); */ } while (1) arrow_serve2 (); } else if (arrow_arg_state.what_doing == arrow_printing_coo) { arrow_coo (); } else bow_error ("Internal error"); } exit (0);}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -