📄 opts.c
字号:
memcpy (lex, bow_html_lexer, sizeof (bow_lexer)); lex->next = bow_default_lexer; bow_default_lexer = lex; break; } case LEX_WHITE_KEY: /* Use the whitespace lexer parameters */ memcpy (bow_default_lexer_parameters, bow_white_lexer_parameters, sizeof (bow_lexer_parameters)); break; case LEX_ALPHANUM_KEY: /* Use the alphanum lexer */ memcpy (bow_default_lexer_parameters, bow_alphanum_lexer_parameters, sizeof (bow_lexer_parameters)); break; case LEX_SUFFIXING_KEY: /* Use the suffixing lexer for prepending tags like `Date:' etc */ { /* By default it uses the html_lexer as its underlying lexer */ bow_lexer *ulex = bow_malloc (sizeof (bow_lexer)); bow_lexer *lex = bow_malloc (sizeof (bow_lexer)); memcpy (ulex, bow_html_lexer, sizeof (bow_lexer)); ulex->next = bow_default_lexer; memcpy (lex, bow_suffixing_lexer, sizeof (bow_lexer)); lex->next = ulex; bow_default_lexer = lex; break; } case LEX_INFIX_KEY: bow_lexer_infix_separator = arg; bow_lexer_infix_length = strlen (arg); break; case FLEX_MAIL_KEY: bow_flex_option = USE_MAIL_FLEXER; break; case FLEX_TAGGED_KEY: bow_flex_option = USE_TAGGED_FLEXER; break; case SHORTEST_WORD_KEY: /* Set the length of the shortest token that will not be tossed */ { int s = atoi (arg); assert (s > 0); bow_lexer_toss_words_shorter_than = s; break; } case 's': /* Do not toss lexed words that appear in the stoplist */ bow_lexer_stoplist_func = NULL; break; case 's'+KEY_OPPOSITE: /* Toss lexed words that appear in the stoplist */ bow_lexer_stoplist_func = bow_stoplist_present; break; case 'S': /* Modify lexed words with the `Porter' stemming function */ bow_lexer_stem_func = bow_stem_porter; break; case 'S'+KEY_OPPOSITE: /* Do not modify lexed words with a stemmiog function. (default) */ /* Modify lexed words with the `Porter' stemming function */ bow_lexer_stem_func = NULL; break; case APPEND_STOPLIST_FILE_KEY: bow_stoplist_add_from_file (arg); break; case REPLACE_STOPLIST_FILE_KEY: bow_stoplist_replace_with_file (arg); break; case ANNOTATION_KEY: bow_annotation_filename = arg; break; case 'U': /* Use a special lexer for UseNet articles, ignore some headers and uuencoded blocks. */ bow_error ("The -U option is broken."); break; case EXCLUDE_FILENAME_KEY: bow_exclude_filename = arg; break; case LEX_PIPE_COMMAND_KEY: bow_lex_pipe_command = arg; if (bow_hdb) bow_error ("--hdb and --lex-pipe-command options cannot be used in" " conjunction\n"); break; case ISTEXT_AVOID_UUENCODE_KEY: bow_istext_avoid_uuencode = 1; break; case XXX_WORDS_ONLY_KEY: bow_xxx_words_only = 1; break; case MAX_NUM_WORDS_PER_DOCUMENT_KEY: bow_lexer_max_num_words_per_document = atoi (arg); break; case USE_UNKNOWN_WORD_KEY: bow_word2int_use_unknown_word = 1; break; /* Feature selection options. */ case 'T': /* Remove all but the top N words by selecting words with highest information gain */ bow_prune_vocab_by_infogain_n = atoi (arg); break; case 'O': /* Remove words that occur less than N times */ bow_prune_vocab_by_occur_count_n = atoi (arg); break; case 'D': /* Remove words that occur in N or fewer documents */ bow_prune_words_by_doc_count_n = atoi (arg); break; case 'm': bow_argp_method = bow_method_at_name (arg); break; case SMOOTHING_METHOD_KEY: if (!strcmp (arg, "goodturing")) bow_smoothing_method = bow_smoothing_goodturing; else if (!strcmp (arg, "laplace")) bow_smoothing_method = bow_smoothing_laplace; else if (!strcmp (arg, "mestimate")) bow_smoothing_method = bow_smoothing_mestimate; else if (!strcmp (arg, "wittenbell")) bow_smoothing_method = bow_smoothing_wittenbell; else if (!strcmp (arg, "dirichlet")) bow_smoothing_method = bow_smoothing_dirichlet; else bow_error ("--smoothing-method: No such smoothing method `%s'", arg); break; case SMOOTHING_GOODTURING_K: bow_smoothing_goodturing_k = atoi (arg); break; case SMOOTHING_DIRICHLET_FILENAME: bow_smoothing_dirichlet_filename = arg; break; case SMOOTHING_DIRICHLET_WEIGHT: bow_smoothing_dirichlet_weight = atof (arg); break; case PRINT_WORD_SCORES_KEY: bow_print_word_scores = 1; break; case UNIFORM_CLASS_PRIORS_KEY: bow_uniform_class_priors = 1; break; case BINARY_WORD_COUNTS_KEY: /* Use binary absence/presence, instead of integer occurrence counts for words. */ bow_binary_word_counts = 1; break; case EVENT_MODEL_KEY: if (!strcmp (arg, "document")) bow_event_model = bow_event_document; else if (!strcmp (arg, "word")) bow_event_model = bow_event_word; else if (!strcmp (arg, "document-then-word") || !strcmp (arg, "dw")) bow_event_model = bow_event_document_then_word; else bow_error ("--event-model: No such event model `%s'", arg); break; case EVENT_DOC_THEN_WORD_DOC_LENGTH_KEY: bow_event_document_then_word_document_length = atoi (arg); assert (bow_event_document_then_word_document_length > 0); break; case INFOGAIN_EVENT_MODEL_KEY: if (!strcmp (arg, "document")) bow_infogain_event_model = bow_event_document; else if (!strcmp (arg, "word")) bow_infogain_event_model = bow_event_word; else if (!strcmp (arg, "document-then-word")) bow_infogain_event_model = bow_event_document_then_word; else bow_error ("--infogain_event-model: No such event model `%s'", arg); break; case ARGP_KEY_INIT: /* Things to do before any arguments are processed. */ /* If the file ./.bow-stopwords exists, load the extra words into the stoplist. */ bow_stoplist_add_from_file ("./.bow-stopwords"); /* If the file ~/.bow-stopwords exists, load the extra words into the stoplist. */ { const char sw_fn[] = "/.bow-stopwords"; const char *home = getenv ("HOME"); if (home != NULL) { char filename[strlen (home) + strlen (sw_fn) + 1]; strcpy (filename, home); strcat (filename, sw_fn); bow_stoplist_add_from_file (filename); } } /* Build the default data directory name, in case it wasn't specified on the command line. */ assert (program_invocation_short_name); if (!bow_data_dirname) { char *homedir = getenv ("HOME"); if (homedir != NULL) { char *dirname = bow_malloc (strlen (homedir) + strlen ("/.") + strlen (program_invocation_short_name) + 1); strcpy (dirname, homedir); strcat (dirname, "/."); strcat (dirname, program_invocation_short_name); bow_data_dirname = dirname; } } case ARGP_KEY_ARG: break; case ARGP_KEY_END: /* Create the data directory, if it doesn't exist already. */ { struct stat st; int e; e = stat (bow_data_dirname, &st); if (e == 0) { /* Assume this means the file exists. */ if (!S_ISDIR (st.st_mode)) bow_error ("`%s' already exists, but is not a directory", bow_data_dirname); }#if !defined(DART) && !defined(FDART) else { if (mkdir (bow_data_dirname, 0777) == 0) bow_verbosify (bow_quiet, "Created directory `%s'.\n", bow_data_dirname); else if (errno != EEXIST) bow_error ("Couldn't create default data directory `%s'", bow_data_dirname); }#endif } default: return ARGP_ERR_UNKNOWN; } return 0;}static char *_help_filter (int key, const char *text, void *input){ char *ret; /* Add the names of the available methods to the help text. */ if (key == 'm' && bow_methods) { static const int len = 1024; char methodnames[len]; int i; bow_method *m; methodnames[0] = '\0'; for (i = bow_methods->array->length-1; i >= 0; i--) { m = bow_sarray_entry_at_index (bow_methods, i); strcat (methodnames, m->name); strcat (methodnames, ", "); } strcat (methodnames, "default=naivebayes."); assert (strlen (methodnames) < len); ret = malloc (strlen (text) + len); strcpy (ret, text); strcat (ret, methodnames); return ret; } return (char*)text;}/* This may be used with argp_parse to parse standard libbow startup options, possible chained onto the end of a user argp structure. */const struct argp bow_argp ={ bow_options, /* data structure describing cmdline options */ parse_bow_opt, /* the function to handle the options */ 0, /* non-option argument documention */ 0, /* extra text printed before and after help */ 0, /* argp children */ _help_filter};#define MAX_NUM_CHILDREN 100struct argp_child bow_argp_children[MAX_NUM_CHILDREN] ={ { &bow_argp, /* the argp structure */ 0, /* flags for child */ "Libbow options", /* optional header */ 999 /* group (general lib flags at end of help)*/ }, {0}};/* The number of children already initialized in the const assignment above. */static int bow_argp_children_length = 1;/* Add the options in CHILD to the list of command-line options. */voidbow_argp_add_child (struct argp_child *child){ assert (bow_argp_children_length+1 < MAX_NUM_CHILDREN); memcpy (bow_argp_children + bow_argp_children_length, child, sizeof (struct argp_child)); bow_argp_children_length++;#if 1 memset (bow_argp_children + bow_argp_children_length, 0, sizeof (struct argp_child));#endif}static void_print_version (FILE *stream, struct argp_state *state){ if (argp_program_version) /* If this is non-zero, then the program's probably defined it, so let that take precedence over the default. */ fprintf (stream, "%s\n", argp_program_version); /* And because libbow is a changing, integral part, put our information out too. */ fprintf (stream, "libbow %d.%d\n", BOW_MAJOR_VERSION, BOW_MINOR_VERSION);}void (*argp_program_version_hook) (FILE *stream, struct argp_state *state) = _print_version;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -