📄 tui.java
字号:
/* Copyright (C) 2002 Univ. of Massachusetts Amherst, Computer Science Dept. This file is part of "MALLET" (MAchine Learning for LanguagE Toolkit). http://www.cs.umass.edu/~mccallum/mallet This software is provided under the terms of the Common Public License, version 1.0, as published by http://www.opensource.org. For further information, see the file `LICENSE' included with this distribution. *//** @author Andrew McCallum <a href="mailto:mccallum@cs.umass.edu">mccallum@cs.umass.edu</a> */package edu.umass.cs.mallet.share.mccallum.ner;import edu.umass.cs.mallet.base.types.*;import edu.umass.cs.mallet.base.fst.*;import edu.umass.cs.mallet.base.minimize.*;import edu.umass.cs.mallet.base.minimize.tests.*;import edu.umass.cs.mallet.base.pipe.*;import edu.umass.cs.mallet.base.pipe.iterator.*;import edu.umass.cs.mallet.base.pipe.tsf.*;import edu.umass.cs.mallet.base.util.*;import junit.framework.*;import java.util.Iterator;import java.util.Random;import java.util.regex.*;import java.io.*;public class TUI{ static CommandOption.Double gaussianVarianceOption = new CommandOption.Double (TUI.class, "gaussian-variance", "DECIMAL", true, 10.0, "The gaussian prior variance used for training.", null); static CommandOption.Double hyperbolicSlopeOption = new CommandOption.Double (TUI.class, "hyperbolic-slope", "DECIMAL", true, 0.2, "The hyperbolic prior slope used for training.", null); static CommandOption.Double hyperbolicSharpnessOption = new CommandOption.Double (TUI.class, "hyperbolic-sharpness", "DECIMAL", true, 10.0, "The hyperbolic prior sharpness used for training.", null); static CommandOption.File crfInputFileOption = new CommandOption.File (TUI.class, "crf-input-file", "FILENAME", true, null, "The name of the file to write the CRF after training.", null); static CommandOption.Integer randomSeedOption = new CommandOption.Integer (TUI.class, "random-seed", "INTEGER", true, 0, "The random seed for randomly selecting a proportion of the instance list for training", null); static CommandOption.Integer labelGramOption = new CommandOption.Integer (TUI.class, "label-gram", "INTEGER", true, 1, "Markov order of labels: 1, 2, 3", null); static CommandOption.Integer wordWindowFeatureOption = new CommandOption.Integer (TUI.class, "word-window-size", "INTEGER", true, 0, "Size of window of words as features: 0=none, 10, 20...", null); static CommandOption.Boolean useTestbOption = new CommandOption.Boolean (TUI.class, "use-testb", "true|false", true, false, "Use testb, final test set", null); static CommandOption.Boolean useHyperbolicPriorOption = new CommandOption.Boolean (TUI.class, "use-hyperbolic-prior", "true|false", true, false, "Use hyperbolic prior", null); static CommandOption.Boolean useFeatureInductionOption = new CommandOption.Boolean (TUI.class, "use-feature-induction", "true|false", true, false, "Not use or use feature induction", null); static CommandOption.Boolean clusterFeatureInductionOption = new CommandOption.Boolean (TUI.class, "cluster-feature-induction", "true|false", true, false, "Cluster in feature induction", null); static CommandOption.Boolean useFirstMentionFeatureOption = new CommandOption.Boolean (TUI.class, "use-firstmention-feature", "true|false", true, false, "Don't use first-mention feature", null); static CommandOption.Boolean useDocHeaderFeatureOption = new CommandOption.Boolean (TUI.class, "use-docheader-feature", "true|false", true, false, "", null); static CommandOption.Boolean includeConllLexiconsOption = new CommandOption.Boolean (TUI.class, "include-conll-lexicons", "true|false", true, false, "", null); static CommandOption.Boolean charNGramsOption = new CommandOption.Boolean (TUI.class, "char-ngrams", "true|false", true, false, "", null); static CommandOption.String offsetsOption = new CommandOption.String (TUI.class, "offsets", "e.g. [[0,0],[1]]", true, "[[-2],[-1],[1],[2]]", "Offset conjunctions", null); static CommandOption.String capOffsetsOption = new CommandOption.String (TUI.class, "cap-offsets", "e.g. [[0,0],[0,1]]", true, "", "Offset conjunctions applied to features that are [A-Z]*", null); static CommandOption.String viterbiFilePrefixOption = new CommandOption.String (TUI.class, "viterbi-file", "FILE", true, "TUI", "Filename in which to store most recent Viterbi output", null); static final CommandOption.List commandOptions = new CommandOption.List ( "Training, testing and running a Chinese word segmenter.", new CommandOption[] { gaussianVarianceOption, hyperbolicSlopeOption, hyperbolicSharpnessOption, randomSeedOption, labelGramOption, wordWindowFeatureOption, useHyperbolicPriorOption, useFeatureInductionOption, clusterFeatureInductionOption, useFirstMentionFeatureOption, useDocHeaderFeatureOption, includeConllLexiconsOption, offsetsOption, capOffsetsOption, viterbiFilePrefixOption, useTestbOption, }); int numEvaluations = 0; static int iterationsBetweenEvals = 16; static boolean doingFeatureInduction = true; static boolean doingClusteredFeatureInduction = false; private static String CAPS = "[A-Z辽陀诶忍屹茄宪]"; private static String LOW = "[a-z噼祢
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -