stringtowordvector.java

来自「Weka」· Java 代码 · 共 1,638 行 · 第 1/4 页

JAVA
1,638
字号
  /**   * Sets which attributes are to be processed.   *   * @param attributes an array containing indexes of attributes to process.   * Since the array will typically come from a program, attributes are indexed   * from 0.   * @throws IllegalArgumentException if an invalid set of ranges   * is supplied    */  public void setAttributeIndicesArray(int[] attributes) {    setAttributeIndices(Range.indicesToRangeList(attributes));  }  /**   * Returns the tip text for this property   *   * @return tip text for this property suitable for   * displaying in the explorer/experimenter gui   */  public String invertSelectionTipText() {    return "Set attribute selection mode. If false, only selected"      + " attributes in the range will be worked on; if"      + " true, only non-selected attributes will be processed.";  }  /**   * Gets whether the supplied columns are to be processed or skipped   *   * @return true if the supplied columns will be kept   */  public boolean getInvertSelection() {    return m_SelectedRange.getInvert();  }  /**   * Sets whether selected columns should be processed or skipped.   *   * @param invert the new invert setting   */  public void setInvertSelection(boolean invert) {    m_SelectedRange.setInvert(invert);  }  /**   * Get the attribute name prefix.   *   * @return The current attribute name prefix.   */  public String getAttributeNamePrefix() {    return m_Prefix;  }      /**   * Set the attribute name prefix.   *   * @param newPrefix String to use as the attribute name prefix.   */  public void setAttributeNamePrefix(String newPrefix) {    m_Prefix = newPrefix;  }  /**   * Returns the tip text for this property   * @return tip text for this property suitable for   * displaying in the explorer/experimenter gui   */  public String attributeNamePrefixTipText() {      return "Prefix for the created attribute names. "+             "(default: \"\")";  }  /**   * Gets the number of words (per class if there is a class attribute   * assigned) to attempt to keep.   *   * @return the target number of words in the output vector (per class if   * assigned).   */  public int getWordsToKeep() {    return m_WordsToKeep;  }    /**   * Sets the number of words (per class if there is a class attribute   * assigned) to attempt to keep.   *   * @param newWordsToKeep the target number of words in the output    * vector (per class if assigned).   */  public void setWordsToKeep(int newWordsToKeep) {    m_WordsToKeep = newWordsToKeep;  }    /**   * Returns the tip text for this property   * @return tip text for this property suitable for   * displaying in the explorer/experimenter gui   */  public String wordsToKeepTipText() {      return "The number of words (per class if there is a class attribute "+             "assigned) to attempt to keep.";  }  /** Gets whether if the word frequencies should be transformed into   *  log(1+fij) where fij is the frequency of word i in document(instance) j.   *   * @return true if word frequencies are to be transformed.   */  public boolean getTFTransform() {      return this.m_TFTransform;  }    /** Sets whether if the word frequencies should be transformed into   *  log(1+fij) where fij is the frequency of word i in document(instance) j.   *   * @param TFTransform true if word frequencies are to be transformed.   */  public void setTFTransform(boolean TFTransform) {      this.m_TFTransform = TFTransform;  }    /**   * Returns the tip text for this property   * @return tip text for this property suitable for   * displaying in the explorer/experimenter gui   */  public String TFTransformTipText() {      return "Sets whether if the word frequencies should be transformed into:\n "+             "   log(1+fij) \n"+             "       where fij is the frequency of word i in document (instance) j.";  }    /** Sets whether if the word frequencies in a document should be transformed   * into: <br>   * fij*log(num of Docs/num of Docs with word i) <br>   *      where fij is the frequency of word i in document(instance) j.   *   * @return true if the word frequencies are to be transformed.   */  public boolean getIDFTransform() {      return this.m_IDFTransform;  }    /** Sets whether if the word frequencies in a document should be transformed   * into: <br>   * fij*log(num of Docs/num of Docs with word i) <br>   *      where fij is the frequency of word i in document(instance) j.   *   * @param IDFTransform true if the word frequecies are to be transformed   */  public void setIDFTransform(boolean IDFTransform) {      this.m_IDFTransform = IDFTransform;  }    /**   * Returns the tip text for this property   * @return tip text for this property suitable for   * displaying in the explorer/experimenter gui   */  public String IDFTransformTipText() {      return "Sets whether if the word frequencies in a document should be "+             "transformed into: \n"+             "   fij*log(num of Docs/num of Docs with word i) \n"+             "      where fij is the frequency of word i in document (instance) j.";  }    /** Gets whether if the word frequencies for a document (instance) should   *  be normalized or not.   *   * @return true if word frequencies are to be normalized.   */  public SelectedTag getNormalizeDocLength() {    return new SelectedTag(m_filterType, TAGS_FILTER);  }    /** Sets whether if the word frequencies for a document (instance) should   *  be normalized or not.   *   * @param newType the new type.   */  public void setNormalizeDocLength(SelectedTag newType) {        if (newType.getTags() == TAGS_FILTER) {      m_filterType = newType.getSelectedTag().getID();    }  }  /**   * Returns the tip text for this property   *   * @return tip text for this property suitable for   * displaying in the explorer/experimenter gui   */  public String normalizeDocLengthTipText() {      return "Sets whether if the word frequencies for a document (instance) "+             "should be normalized or not.";  }    /** Gets whether if the tokens are to be downcased or not.   *   * @return true if the tokens are to be downcased.   */  public boolean getLowerCaseTokens() {      return this.m_lowerCaseTokens;  }    /** Sets whether if the tokens are to be downcased or not. (Doesn't affect   * non-alphabetic characters in tokens).   *   * @param downCaseTokens should be true if only lower case tokens are    * to be formed.   */  public void setLowerCaseTokens(boolean downCaseTokens) {      this.m_lowerCaseTokens = downCaseTokens;  }  /**   * Returns the tip text for this property.   *   * @return tip text for this property suitable for   * displaying in the explorer/experimenter gui   */  public String doNotOperateOnPerClassBasisTipText() {      return "If this is set, the maximum number of words and the "	+ "minimum term frequency is not enforced on a per-class "	+ "basis but based on the documents in all the classes "	+  "(even if a class attribute is set).";  }  /**   * Get the DoNotOperateOnPerClassBasis value.   * @return the DoNotOperateOnPerClassBasis value.   */  public boolean getDoNotOperateOnPerClassBasis() {    return m_doNotOperateOnPerClassBasis;  }  /**   * Set the DoNotOperateOnPerClassBasis value.   * @param newDoNotOperateOnPerClassBasis The new DoNotOperateOnPerClassBasis value.   */  public void setDoNotOperateOnPerClassBasis(boolean newDoNotOperateOnPerClassBasis) {    this.m_doNotOperateOnPerClassBasis = newDoNotOperateOnPerClassBasis;  }  /**   * Returns the tip text for this property.   *   * @return tip text for this property suitable for   * displaying in the explorer/experimenter gui   */  public String minTermFreqTipText() {      return "Sets the minimum term frequency. This is enforced "	+ "on a per-class basis.";  }  /**   * Get the MinTermFreq value.   * @return the MinTermFreq value.   */  public int getMinTermFreq() {    return m_minTermFreq;  }  /**   * Set the MinTermFreq value.   * @param newMinTermFreq The new MinTermFreq value.   */  public void setMinTermFreq(int newMinTermFreq) {    this.m_minTermFreq = newMinTermFreq;  }    /**   * Returns the tip text for this property.   *   * @return tip text for this property suitable for   * displaying in the explorer/experimenter gui   */  public String lowerCaseTokensTipText() {      return "If set then all the word tokens are converted to lower case "+             "before being added to the dictionary.";  }  /** Gets whether if the words on the stoplist are to be ignored (The stoplist   *  is in weka.core.StopWords).   *   * @return true if the words on the stoplist are to be ignored.   */  public boolean getUseStoplist() {      return m_useStoplist;  }      /** Sets whether if the words that are on a stoplist are to be ignored (The   * stop list is in weka.core.StopWords).   *   * @param useStoplist true if the tokens that are on a stoplist are to be    * ignored.   */  public void setUseStoplist(boolean useStoplist) {      m_useStoplist = useStoplist;  }      /**   * Returns the tip text for this property.   *   * @return tip text for this property suitable for   * displaying in the explorer/experimenter gui   */  public String useStoplistTipText() {      return "Ignores all the words that are on the stoplist, if set to true.";  }   /**   * the stemming algorithm to use, null means no stemming at all (i.e., the   * NullStemmer is used)   *   * @param value     the configured stemming algorithm, or null   * @see             NullStemmer   */  public void setStemmer(Stemmer value) {    if (value != null)      m_Stemmer = value;    else      m_Stemmer = new NullStemmer();  }  /**   * Returns the current stemming algorithm, null if none is used.   *   * @return          the current stemming algorithm, null if none set   */  public Stemmer getStemmer() {    return m_Stemmer;  }  /**   * Returns the tip text for this property.   *   * @return tip text for this property suitable for   * displaying in the explorer/experimenter gui   */  public String stemmerTipText() {    return "The stemming algorithm to use on the words.";  }  /**   * sets the file containing the stopwords, null or a directory unset the   * stopwords. If the file exists, it automatically turns on the flag to   * use the stoplist.   *   * @param value     the file containing the stopwords   */  public void setStopwords(File value) {    if (value == null)      value = new File(System.getProperty("user.dir"));    m_Stopwords = value;    if (value.exists() && value.isFile())      setUseStoplist(true);  }  /**   * returns the file used for obtaining the stopwords, if the file represents   * a directory then the default ones are used.   *   * @return          the file containing the stopwords   */  public File getStopwords() {    return m_Stopwords;  }  /**   * Returns the tip text for this property.   *   * @return tip text for this property suitable for   * displaying in the explorer/experimenter gui   */  public String stopwordsTipText() {    return "The file containing the stopwords (if this is a directory then the default ones are used).";  }    /**   * the tokenizer algorithm to use   *   * @param value     the configured tokenizing algorithm   */  public void setTokenizer(Tokenizer value) {    m_Tokenizer = value;  }  /**   * Returns the current tokenizer algorithm.   *   * @return          the current tokenizer algorithm   */  public Tokenizer getTokenizer() {    return m_Tokenizer;  }  /**   * Returns the tip text for this property.   *   * @return tip text for this property suitable for   * displaying in the explorer/experimenter gui   */  public String tokenizerTipText() {    return "The tokenizing algorithm to use on the strings.";

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?