📄 opts.c

📁 卡内基梅隆大学MaCallum开发的文本分类系统
💻 C
📖 第 1 页 / 共 2 页
字号:
上一页 12
	memcpy (lex, bow_html_lexer, sizeof (bow_lexer));	lex->next = bow_default_lexer;	bow_default_lexer = lex;	break;      }    case LEX_WHITE_KEY:      /* Use the whitespace lexer parameters */      memcpy (bow_default_lexer_parameters, bow_white_lexer_parameters,	      sizeof (bow_lexer_parameters));      break;    case LEX_ALPHANUM_KEY:      /* Use the alphanum lexer */      memcpy (bow_default_lexer_parameters, bow_alphanum_lexer_parameters,	      sizeof (bow_lexer_parameters));      break;    case LEX_SUFFIXING_KEY:      /* Use the suffixing lexer for prepending tags like `Date:' etc */      {	/* By default it uses the html_lexer as its underlying lexer */	bow_lexer *ulex = bow_malloc (sizeof (bow_lexer));	bow_lexer *lex = bow_malloc (sizeof (bow_lexer));	memcpy (ulex, bow_html_lexer, sizeof (bow_lexer));	ulex->next = bow_default_lexer;	memcpy (lex, bow_suffixing_lexer, sizeof (bow_lexer));	lex->next = ulex;	bow_default_lexer = lex;	break;      }    case LEX_INFIX_KEY:      bow_lexer_infix_separator = arg;      bow_lexer_infix_length = strlen (arg);      break;    case FLEX_MAIL_KEY:      bow_flex_option = USE_MAIL_FLEXER;      break;    case FLEX_TAGGED_KEY:      bow_flex_option = USE_TAGGED_FLEXER;      break;    case SHORTEST_WORD_KEY:      /* Set the length of the shortest token that will not be tossed */      {	int s = atoi (arg);	assert (s > 0);	bow_lexer_toss_words_shorter_than = s;	break;      }    case 's':      /* Do not toss lexed words that appear in the stoplist */      bow_lexer_stoplist_func = NULL;      break;    case 's'+KEY_OPPOSITE:      /* Toss lexed words that appear in the stoplist */      bow_lexer_stoplist_func = bow_stoplist_present;      break;    case 'S':      /* Modify lexed words with the `Porter' stemming function */      bow_lexer_stem_func = bow_stem_porter;      break;    case 'S'+KEY_OPPOSITE:      /* Do not modify lexed words with a stemmiog function. (default) */      /* Modify lexed words with the `Porter' stemming function */      bow_lexer_stem_func = NULL;      break;    case APPEND_STOPLIST_FILE_KEY:      bow_stoplist_add_from_file (arg);      break;    case REPLACE_STOPLIST_FILE_KEY:      bow_stoplist_replace_with_file (arg);      break;    case ANNOTATION_KEY:      bow_annotation_filename = arg;      break;    case 'U':      /* Use a special lexer for UseNet articles, ignore some headers and	 uuencoded blocks. */      bow_error ("The -U option is broken.");      break;    case EXCLUDE_FILENAME_KEY:      bow_exclude_filename = arg;      break;    case LEX_PIPE_COMMAND_KEY:      bow_lex_pipe_command = arg;      if (bow_hdb)	bow_error ("--hdb and --lex-pipe-command options cannot be used in"		   " conjunction\n");      break;    case ISTEXT_AVOID_UUENCODE_KEY:      bow_istext_avoid_uuencode = 1;      break;    case XXX_WORDS_ONLY_KEY:      bow_xxx_words_only = 1;      break;    case MAX_NUM_WORDS_PER_DOCUMENT_KEY:      bow_lexer_max_num_words_per_document = atoi (arg);      break;    case USE_UNKNOWN_WORD_KEY:      bow_word2int_use_unknown_word = 1;      break;      /* Feature selection options. */    case 'T':      /* Remove all but the top N words by selecting words with highest 	 information gain */      bow_prune_vocab_by_infogain_n = atoi (arg);      break;    case 'O':      /* Remove words that occur less than N times */      bow_prune_vocab_by_occur_count_n = atoi (arg);      break;    case 'D':      /* Remove words that occur in N or fewer documents */      bow_prune_words_by_doc_count_n = atoi (arg);      break;    case 'm':      bow_argp_method = bow_method_at_name (arg);      break;    case SMOOTHING_METHOD_KEY:      if (!strcmp (arg, "goodturing"))	bow_smoothing_method = bow_smoothing_goodturing;      else if (!strcmp (arg, "laplace"))	bow_smoothing_method = bow_smoothing_laplace;      else if (!strcmp (arg, "mestimate"))	bow_smoothing_method = bow_smoothing_mestimate;      else if (!strcmp (arg, "wittenbell"))	bow_smoothing_method = bow_smoothing_wittenbell;      else if (!strcmp (arg, "dirichlet"))	bow_smoothing_method = bow_smoothing_dirichlet;      else	bow_error ("--smoothing-method: No such smoothing method `%s'", arg);      break;    case SMOOTHING_GOODTURING_K:      bow_smoothing_goodturing_k = atoi (arg);      break;    case SMOOTHING_DIRICHLET_FILENAME:      bow_smoothing_dirichlet_filename = arg;      break;    case SMOOTHING_DIRICHLET_WEIGHT:      bow_smoothing_dirichlet_weight = atof (arg);      break;    case PRINT_WORD_SCORES_KEY:      bow_print_word_scores = 1;      break;    case UNIFORM_CLASS_PRIORS_KEY:      bow_uniform_class_priors = 1;      break;    case BINARY_WORD_COUNTS_KEY:      /* Use binary absence/presence, instead of integer occurrence         counts for words. */      bow_binary_word_counts = 1;      break;    case EVENT_MODEL_KEY:      if (!strcmp (arg, "document"))	bow_event_model = bow_event_document;      else if (!strcmp (arg, "word"))	bow_event_model = bow_event_word;      else if (!strcmp (arg, "document-then-word")	       || !strcmp (arg, "dw"))	bow_event_model = bow_event_document_then_word;      else	bow_error ("--event-model: No such event model `%s'", arg);      break;    case EVENT_DOC_THEN_WORD_DOC_LENGTH_KEY:      bow_event_document_then_word_document_length = atoi (arg);      assert (bow_event_document_then_word_document_length > 0);      break;    case INFOGAIN_EVENT_MODEL_KEY:      if (!strcmp (arg, "document"))	bow_infogain_event_model = bow_event_document;      else if (!strcmp (arg, "word"))	bow_infogain_event_model = bow_event_word;      else if (!strcmp (arg, "document-then-word"))	bow_infogain_event_model = bow_event_document_then_word;      else	bow_error ("--infogain_event-model: No such event model `%s'", arg);      break;    case ARGP_KEY_INIT:      /* Things to do before any arguments are processed. */      /* If the file ./.bow-stopwords exists, load the extra words into	 the stoplist. */      bow_stoplist_add_from_file ("./.bow-stopwords");      /* If the file ~/.bow-stopwords exists, load the extra words into	 the stoplist. */      {	const char sw_fn[] = "/.bow-stopwords";	const char *home = getenv ("HOME");	if (home != NULL) {	    char filename[strlen (home) + strlen (sw_fn) + 1];	    strcpy (filename, home);	    strcat (filename, sw_fn);	    bow_stoplist_add_from_file (filename);    	}      }       /* Build the default data directory name, in case it wasn't	 specified on the command line. */      assert (program_invocation_short_name);      if (!bow_data_dirname)	{	  char *homedir = getenv ("HOME");	  if (homedir != NULL) {	      char *dirname = bow_malloc (strlen (homedir) 					  + strlen ("/.")					  + strlen (program_invocation_short_name)					  + 1);	      strcpy (dirname, homedir);	      strcat (dirname, "/.");	      strcat (dirname, program_invocation_short_name);	      bow_data_dirname = dirname;	  }	}    case ARGP_KEY_ARG:      break;    case ARGP_KEY_END:      /* Create the data directory, if it doesn't exist already. */      {	struct stat st;	int e;	e = stat (bow_data_dirname, &st);	if (e == 0)	  {	    /* Assume this means the file exists. */	    if (!S_ISDIR (st.st_mode))	      bow_error ("`%s' already exists, but is not a directory",			 bow_data_dirname);	  }#if !defined(DART) && !defined(FDART)	else	  {	    if (mkdir (bow_data_dirname, 0777) == 0)	      bow_verbosify (bow_quiet, "Created directory `%s'.\n", 			     bow_data_dirname);	    else if (errno != EEXIST)	      bow_error ("Couldn't create default data directory `%s'",			 bow_data_dirname);	  }#endif      }    default:      return ARGP_ERR_UNKNOWN;    }  return 0;}static char *_help_filter (int key, const char *text, void *input){  char *ret;  /* Add the names of the available methods to the help text. */  if (key == 'm' && bow_methods)    {      static const int len = 1024;      char methodnames[len];      int i;      bow_method *m;      methodnames[0] = '\0';      for (i = bow_methods->array->length-1; i >= 0; i--)	{	  m = bow_sarray_entry_at_index (bow_methods, i);	  strcat (methodnames, m->name);	  strcat (methodnames, ", ");	}      strcat (methodnames, "default=naivebayes.");      assert (strlen (methodnames) < len);      ret = malloc (strlen (text) + len);      strcpy (ret, text);      strcat (ret, methodnames);      return ret;    }  return (char*)text;}/* This may be used with argp_parse to parse standard libbow startup   options, possible chained onto the end of a user argp structure.  */const struct argp bow_argp ={  bow_options,			/* data structure describing cmdline options */  parse_bow_opt,		/* the function to handle the options */  0,				/* non-option argument documention */  0,				/* extra text printed before and after help */  0,				/* argp children */  _help_filter};#define MAX_NUM_CHILDREN 100struct argp_child bow_argp_children[MAX_NUM_CHILDREN] ={  {    &bow_argp,			/* the argp structure */    0,				/* flags for child */    "Libbow options",		/* optional header */    999				/* group (general lib flags at end of help)*/  },  {0}};/* The number of children already initialized in the const assignment above. */static int bow_argp_children_length = 1;/* Add the options in CHILD to the list of command-line options. */voidbow_argp_add_child (struct argp_child *child){  assert (bow_argp_children_length+1 < MAX_NUM_CHILDREN);  memcpy (bow_argp_children + bow_argp_children_length,	  child,	  sizeof (struct argp_child));  bow_argp_children_length++;#if 1  memset (bow_argp_children + bow_argp_children_length,	  0, sizeof (struct argp_child));#endif}static void_print_version (FILE *stream, struct argp_state *state){  if (argp_program_version)    /* If this is non-zero, then the program's probably defined it, so let       that take precedence over the default.  */    fprintf (stream, "%s\n", argp_program_version);  /* And because libbow is a changing, integral part, put our     information out too. */  fprintf (stream, "libbow %d.%d\n",	   BOW_MAJOR_VERSION, BOW_MINOR_VERSION);}void (*argp_program_version_hook) (FILE *stream, struct argp_state *state)     = _print_version;
上一页 12
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -