stringtowordvector.java
来自「Weka」· Java 代码 · 共 1,638 行 · 第 1/4 页
JAVA
1,638 行
/** * Sets which attributes are to be processed. * * @param attributes an array containing indexes of attributes to process. * Since the array will typically come from a program, attributes are indexed * from 0. * @throws IllegalArgumentException if an invalid set of ranges * is supplied */ public void setAttributeIndicesArray(int[] attributes) { setAttributeIndices(Range.indicesToRangeList(attributes)); } /** * Returns the tip text for this property * * @return tip text for this property suitable for * displaying in the explorer/experimenter gui */ public String invertSelectionTipText() { return "Set attribute selection mode. If false, only selected" + " attributes in the range will be worked on; if" + " true, only non-selected attributes will be processed."; } /** * Gets whether the supplied columns are to be processed or skipped * * @return true if the supplied columns will be kept */ public boolean getInvertSelection() { return m_SelectedRange.getInvert(); } /** * Sets whether selected columns should be processed or skipped. * * @param invert the new invert setting */ public void setInvertSelection(boolean invert) { m_SelectedRange.setInvert(invert); } /** * Get the attribute name prefix. * * @return The current attribute name prefix. */ public String getAttributeNamePrefix() { return m_Prefix; } /** * Set the attribute name prefix. * * @param newPrefix String to use as the attribute name prefix. */ public void setAttributeNamePrefix(String newPrefix) { m_Prefix = newPrefix; } /** * Returns the tip text for this property * @return tip text for this property suitable for * displaying in the explorer/experimenter gui */ public String attributeNamePrefixTipText() { return "Prefix for the created attribute names. "+ "(default: \"\")"; } /** * Gets the number of words (per class if there is a class attribute * assigned) to attempt to keep. * * @return the target number of words in the output vector (per class if * assigned). */ public int getWordsToKeep() { return m_WordsToKeep; } /** * Sets the number of words (per class if there is a class attribute * assigned) to attempt to keep. * * @param newWordsToKeep the target number of words in the output * vector (per class if assigned). */ public void setWordsToKeep(int newWordsToKeep) { m_WordsToKeep = newWordsToKeep; } /** * Returns the tip text for this property * @return tip text for this property suitable for * displaying in the explorer/experimenter gui */ public String wordsToKeepTipText() { return "The number of words (per class if there is a class attribute "+ "assigned) to attempt to keep."; } /** Gets whether if the word frequencies should be transformed into * log(1+fij) where fij is the frequency of word i in document(instance) j. * * @return true if word frequencies are to be transformed. */ public boolean getTFTransform() { return this.m_TFTransform; } /** Sets whether if the word frequencies should be transformed into * log(1+fij) where fij is the frequency of word i in document(instance) j. * * @param TFTransform true if word frequencies are to be transformed. */ public void setTFTransform(boolean TFTransform) { this.m_TFTransform = TFTransform; } /** * Returns the tip text for this property * @return tip text for this property suitable for * displaying in the explorer/experimenter gui */ public String TFTransformTipText() { return "Sets whether if the word frequencies should be transformed into:\n "+ " log(1+fij) \n"+ " where fij is the frequency of word i in document (instance) j."; } /** Sets whether if the word frequencies in a document should be transformed * into: <br> * fij*log(num of Docs/num of Docs with word i) <br> * where fij is the frequency of word i in document(instance) j. * * @return true if the word frequencies are to be transformed. */ public boolean getIDFTransform() { return this.m_IDFTransform; } /** Sets whether if the word frequencies in a document should be transformed * into: <br> * fij*log(num of Docs/num of Docs with word i) <br> * where fij is the frequency of word i in document(instance) j. * * @param IDFTransform true if the word frequecies are to be transformed */ public void setIDFTransform(boolean IDFTransform) { this.m_IDFTransform = IDFTransform; } /** * Returns the tip text for this property * @return tip text for this property suitable for * displaying in the explorer/experimenter gui */ public String IDFTransformTipText() { return "Sets whether if the word frequencies in a document should be "+ "transformed into: \n"+ " fij*log(num of Docs/num of Docs with word i) \n"+ " where fij is the frequency of word i in document (instance) j."; } /** Gets whether if the word frequencies for a document (instance) should * be normalized or not. * * @return true if word frequencies are to be normalized. */ public SelectedTag getNormalizeDocLength() { return new SelectedTag(m_filterType, TAGS_FILTER); } /** Sets whether if the word frequencies for a document (instance) should * be normalized or not. * * @param newType the new type. */ public void setNormalizeDocLength(SelectedTag newType) { if (newType.getTags() == TAGS_FILTER) { m_filterType = newType.getSelectedTag().getID(); } } /** * Returns the tip text for this property * * @return tip text for this property suitable for * displaying in the explorer/experimenter gui */ public String normalizeDocLengthTipText() { return "Sets whether if the word frequencies for a document (instance) "+ "should be normalized or not."; } /** Gets whether if the tokens are to be downcased or not. * * @return true if the tokens are to be downcased. */ public boolean getLowerCaseTokens() { return this.m_lowerCaseTokens; } /** Sets whether if the tokens are to be downcased or not. (Doesn't affect * non-alphabetic characters in tokens). * * @param downCaseTokens should be true if only lower case tokens are * to be formed. */ public void setLowerCaseTokens(boolean downCaseTokens) { this.m_lowerCaseTokens = downCaseTokens; } /** * Returns the tip text for this property. * * @return tip text for this property suitable for * displaying in the explorer/experimenter gui */ public String doNotOperateOnPerClassBasisTipText() { return "If this is set, the maximum number of words and the " + "minimum term frequency is not enforced on a per-class " + "basis but based on the documents in all the classes " + "(even if a class attribute is set)."; } /** * Get the DoNotOperateOnPerClassBasis value. * @return the DoNotOperateOnPerClassBasis value. */ public boolean getDoNotOperateOnPerClassBasis() { return m_doNotOperateOnPerClassBasis; } /** * Set the DoNotOperateOnPerClassBasis value. * @param newDoNotOperateOnPerClassBasis The new DoNotOperateOnPerClassBasis value. */ public void setDoNotOperateOnPerClassBasis(boolean newDoNotOperateOnPerClassBasis) { this.m_doNotOperateOnPerClassBasis = newDoNotOperateOnPerClassBasis; } /** * Returns the tip text for this property. * * @return tip text for this property suitable for * displaying in the explorer/experimenter gui */ public String minTermFreqTipText() { return "Sets the minimum term frequency. This is enforced " + "on a per-class basis."; } /** * Get the MinTermFreq value. * @return the MinTermFreq value. */ public int getMinTermFreq() { return m_minTermFreq; } /** * Set the MinTermFreq value. * @param newMinTermFreq The new MinTermFreq value. */ public void setMinTermFreq(int newMinTermFreq) { this.m_minTermFreq = newMinTermFreq; } /** * Returns the tip text for this property. * * @return tip text for this property suitable for * displaying in the explorer/experimenter gui */ public String lowerCaseTokensTipText() { return "If set then all the word tokens are converted to lower case "+ "before being added to the dictionary."; } /** Gets whether if the words on the stoplist are to be ignored (The stoplist * is in weka.core.StopWords). * * @return true if the words on the stoplist are to be ignored. */ public boolean getUseStoplist() { return m_useStoplist; } /** Sets whether if the words that are on a stoplist are to be ignored (The * stop list is in weka.core.StopWords). * * @param useStoplist true if the tokens that are on a stoplist are to be * ignored. */ public void setUseStoplist(boolean useStoplist) { m_useStoplist = useStoplist; } /** * Returns the tip text for this property. * * @return tip text for this property suitable for * displaying in the explorer/experimenter gui */ public String useStoplistTipText() { return "Ignores all the words that are on the stoplist, if set to true."; } /** * the stemming algorithm to use, null means no stemming at all (i.e., the * NullStemmer is used) * * @param value the configured stemming algorithm, or null * @see NullStemmer */ public void setStemmer(Stemmer value) { if (value != null) m_Stemmer = value; else m_Stemmer = new NullStemmer(); } /** * Returns the current stemming algorithm, null if none is used. * * @return the current stemming algorithm, null if none set */ public Stemmer getStemmer() { return m_Stemmer; } /** * Returns the tip text for this property. * * @return tip text for this property suitable for * displaying in the explorer/experimenter gui */ public String stemmerTipText() { return "The stemming algorithm to use on the words."; } /** * sets the file containing the stopwords, null or a directory unset the * stopwords. If the file exists, it automatically turns on the flag to * use the stoplist. * * @param value the file containing the stopwords */ public void setStopwords(File value) { if (value == null) value = new File(System.getProperty("user.dir")); m_Stopwords = value; if (value.exists() && value.isFile()) setUseStoplist(true); } /** * returns the file used for obtaining the stopwords, if the file represents * a directory then the default ones are used. * * @return the file containing the stopwords */ public File getStopwords() { return m_Stopwords; } /** * Returns the tip text for this property. * * @return tip text for this property suitable for * displaying in the explorer/experimenter gui */ public String stopwordsTipText() { return "The file containing the stopwords (if this is a directory then the default ones are used)."; } /** * the tokenizer algorithm to use * * @param value the configured tokenizing algorithm */ public void setTokenizer(Tokenizer value) { m_Tokenizer = value; } /** * Returns the current tokenizer algorithm. * * @return the current tokenizer algorithm */ public Tokenizer getTokenizer() { return m_Tokenizer; } /** * Returns the tip text for this property. * * @return tip text for this property suitable for * displaying in the explorer/experimenter gui */ public String tokenizerTipText() { return "The tokenizing algorithm to use on the strings.";
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?