ngramtokenizer.html
来自「数据挖掘的最常用工具。由于开源」· HTML 代码 · 共 649 行 · 第 1/2 页
HTML
649 行
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd"><!--NewPage--><HTML><HEAD><!-- Generated by javadoc (build 1.5.0_13) on Tue Jul 15 15:48:48 NZST 2008 --><TITLE>NGramTokenizer</TITLE><META NAME="keywords" CONTENT="weka.core.tokenizers.NGramTokenizer class"><LINK REL ="stylesheet" TYPE="text/css" HREF="../../../stylesheet.css" TITLE="Style"><SCRIPT type="text/javascript">function windowTitle(){ parent.document.title="NGramTokenizer";}</SCRIPT><NOSCRIPT></NOSCRIPT></HEAD><BODY BGCOLOR="white" onload="windowTitle();"><!-- ========= START OF TOP NAVBAR ======= --><A NAME="navbar_top"><!-- --></A><A HREF="#skip-navbar_top" title="Skip navigation links"></A><TABLE BORDER="0" WIDTH="100%" CELLPADDING="1" CELLSPACING="0" SUMMARY=""><TR><TD COLSPAN=2 BGCOLOR="#EEEEFF" CLASS="NavBarCell1"><A NAME="navbar_top_firstrow"><!-- --></A><TABLE BORDER="0" CELLPADDING="0" CELLSPACING="3" SUMMARY=""> <TR ALIGN="center" VALIGN="top"> <TD BGCOLOR="#EEEEFF" CLASS="NavBarCell1"> <A HREF="../../../overview-summary.html"><FONT CLASS="NavBarFont1"><B>Overview</B></FONT></A> </TD> <TD BGCOLOR="#EEEEFF" CLASS="NavBarCell1"> <A HREF="package-summary.html"><FONT CLASS="NavBarFont1"><B>Package</B></FONT></A> </TD> <TD BGCOLOR="#FFFFFF" CLASS="NavBarCell1Rev"> <FONT CLASS="NavBarFont1Rev"><B>Class</B></FONT> </TD> <TD BGCOLOR="#EEEEFF" CLASS="NavBarCell1"> <A HREF="package-tree.html"><FONT CLASS="NavBarFont1"><B>Tree</B></FONT></A> </TD> <TD BGCOLOR="#EEEEFF" CLASS="NavBarCell1"> <A HREF="../../../deprecated-list.html"><FONT CLASS="NavBarFont1"><B>Deprecated</B></FONT></A> </TD> <TD BGCOLOR="#EEEEFF" CLASS="NavBarCell1"> <A HREF="../../../index-all.html"><FONT CLASS="NavBarFont1"><B>Index</B></FONT></A> </TD> <TD BGCOLOR="#EEEEFF" CLASS="NavBarCell1"> <A HREF="../../../help-doc.html"><FONT CLASS="NavBarFont1"><B>Help</B></FONT></A> </TD> <TD BGCOLOR="#EEEEFF" CLASS="NavBarCell1"> <A HREF="http://www.cs.waikato.ac.nz/ml/weka/" target="_blank"><FONT CLASS="NavBarFont1"><B>Weka's home</B></FONT></A> </TD> </TR></TABLE></TD><TD ALIGN="right" VALIGN="top" ROWSPAN=3><EM></EM></TD></TR><TR><TD BGCOLOR="white" CLASS="NavBarCell2"><FONT SIZE="-2"> <A HREF="../../../weka/core/tokenizers/CharacterDelimitedTokenizer.html" title="class in weka.core.tokenizers"><B>PREV CLASS</B></A> <A HREF="../../../weka/core/tokenizers/Tokenizer.html" title="class in weka.core.tokenizers"><B>NEXT CLASS</B></A></FONT></TD><TD BGCOLOR="white" CLASS="NavBarCell2"><FONT SIZE="-2"> <A HREF="../../../index.html?weka/core/tokenizers/NGramTokenizer.html" target="_top"><B>FRAMES</B></A> <A HREF="NGramTokenizer.html" target="_top"><B>NO FRAMES</B></A> <SCRIPT type="text/javascript"> <!-- if(window==top) { document.writeln('<A HREF="../../../allclasses-noframe.html"><B>All Classes</B></A>'); } //--></SCRIPT><NOSCRIPT> <A HREF="../../../allclasses-noframe.html"><B>All Classes</B></A></NOSCRIPT></FONT></TD></TR><TR><TD VALIGN="top" CLASS="NavBarCell3"><FONT SIZE="-2"> SUMMARY: NESTED | FIELD | <A HREF="#constructor_summary">CONSTR</A> | <A HREF="#method_summary">METHOD</A></FONT></TD><TD VALIGN="top" CLASS="NavBarCell3"><FONT SIZE="-2">DETAIL: FIELD | <A HREF="#constructor_detail">CONSTR</A> | <A HREF="#method_detail">METHOD</A></FONT></TD></TR></TABLE><A NAME="skip-navbar_top"></A><!-- ========= END OF TOP NAVBAR ========= --><HR><!-- ======== START OF CLASS DATA ======== --><H2><FONT SIZE="-1">weka.core.tokenizers</FONT><BR>Class NGramTokenizer</H2><PRE>java.lang.Object <IMG SRC="../../../resources/inherit.gif" ALT="extended by "><A HREF="../../../weka/core/tokenizers/Tokenizer.html" title="class in weka.core.tokenizers">weka.core.tokenizers.Tokenizer</A> <IMG SRC="../../../resources/inherit.gif" ALT="extended by "><A HREF="../../../weka/core/tokenizers/CharacterDelimitedTokenizer.html" title="class in weka.core.tokenizers">weka.core.tokenizers.CharacterDelimitedTokenizer</A> <IMG SRC="../../../resources/inherit.gif" ALT="extended by "><B>weka.core.tokenizers.NGramTokenizer</B></PRE><DL><DT><B>All Implemented Interfaces:</B> <DD>java.io.Serializable, java.util.Enumeration, <A HREF="../../../weka/core/OptionHandler.html" title="interface in weka.core">OptionHandler</A>, <A HREF="../../../weka/core/RevisionHandler.html" title="interface in weka.core">RevisionHandler</A></DD></DL><HR><DL><DT><PRE>public class <B>NGramTokenizer</B><DT>extends <A HREF="../../../weka/core/tokenizers/CharacterDelimitedTokenizer.html" title="class in weka.core.tokenizers">CharacterDelimitedTokenizer</A></DL></PRE><P><!-- globalinfo-start --> Splits a string into an n-gram with min and max grams. <p/> <!-- globalinfo-end --> <!-- options-start --> Valid options are: <p/> <pre> -delimiters <value> The delimiters to use (default ' \r\n\t.,;:'"()?!').</pre> <pre> -max <int> The max size of the Ngram (default = 3).</pre> <pre> -min <int> The min size of the Ngram (default = 1).</pre> <!-- options-end --><P><P><DL><DT><B>Version:</B></DT> <DD>$Revision: 1.4 $</DD><DT><B>Author:</B></DT> <DD>Sebastian Germesin (sebastian.germesin@dfki.de), FracPete (fracpete at waikato dot ac dot nz)</DD><DT><B>See Also:</B><DD><A HREF="../../../serialized-form.html#weka.core.tokenizers.NGramTokenizer">Serialized Form</A></DL><HR><P><!-- ======== CONSTRUCTOR SUMMARY ======== --><A NAME="constructor_summary"><!-- --></A><TABLE BORDER="1" WIDTH="100%" CELLPADDING="3" CELLSPACING="0" SUMMARY=""><TR BGCOLOR="#CCCCFF" CLASS="TableHeadingColor"><TH ALIGN="left" COLSPAN="2"><FONT SIZE="+2"><B>Constructor Summary</B></FONT></TH></TR><TR BGCOLOR="white" CLASS="TableRowColor"><TD><CODE><B><A HREF="../../../weka/core/tokenizers/NGramTokenizer.html#NGramTokenizer()">NGramTokenizer</A></B>()</CODE><BR> </TD></TR></TABLE> <!-- ========== METHOD SUMMARY =========== --><A NAME="method_summary"><!-- --></A><TABLE BORDER="1" WIDTH="100%" CELLPADDING="3" CELLSPACING="0" SUMMARY=""><TR BGCOLOR="#CCCCFF" CLASS="TableHeadingColor"><TH ALIGN="left" COLSPAN="2"><FONT SIZE="+2"><B>Method Summary</B></FONT></TH></TR><TR BGCOLOR="white" CLASS="TableRowColor"><TD ALIGN="right" VALIGN="top" WIDTH="1%"><FONT SIZE="-1"><CODE> int</CODE></FONT></TD><TD><CODE><B><A HREF="../../../weka/core/tokenizers/NGramTokenizer.html#getNGramMaxSize()">getNGramMaxSize</A></B>()</CODE><BR> Gets the max N of the NGram.</TD></TR><TR BGCOLOR="white" CLASS="TableRowColor"><TD ALIGN="right" VALIGN="top" WIDTH="1%"><FONT SIZE="-1"><CODE> int</CODE></FONT></TD><TD><CODE><B><A HREF="../../../weka/core/tokenizers/NGramTokenizer.html#getNGramMinSize()">getNGramMinSize</A></B>()</CODE><BR> Gets the min N of the NGram.</TD></TR><TR BGCOLOR="white" CLASS="TableRowColor"><TD ALIGN="right" VALIGN="top" WIDTH="1%"><FONT SIZE="-1"><CODE> java.lang.String[]</CODE></FONT></TD><TD><CODE><B><A HREF="../../../weka/core/tokenizers/NGramTokenizer.html#getOptions()">getOptions</A></B>()</CODE><BR> Gets the current option settings for the OptionHandler.</TD></TR><TR BGCOLOR="white" CLASS="TableRowColor"><TD ALIGN="right" VALIGN="top" WIDTH="1%"><FONT SIZE="-1"><CODE> java.lang.String</CODE></FONT></TD><TD><CODE><B><A HREF="../../../weka/core/tokenizers/NGramTokenizer.html#getRevision()">getRevision</A></B>()</CODE><BR> Returns the revision string.</TD></TR><TR BGCOLOR="white" CLASS="TableRowColor"><TD ALIGN="right" VALIGN="top" WIDTH="1%"><FONT SIZE="-1"><CODE> java.lang.String</CODE></FONT></TD><TD><CODE><B><A HREF="../../../weka/core/tokenizers/NGramTokenizer.html#globalInfo()">globalInfo</A></B>()</CODE><BR> Returns a string describing the stemmer</TD></TR><TR BGCOLOR="white" CLASS="TableRowColor"><TD ALIGN="right" VALIGN="top" WIDTH="1%"><FONT SIZE="-1"><CODE> boolean</CODE></FONT></TD><TD><CODE><B><A HREF="../../../weka/core/tokenizers/NGramTokenizer.html#hasMoreElements()">hasMoreElements</A></B>()</CODE><BR> returns true if there's more elements available</TD></TR><TR BGCOLOR="white" CLASS="TableRowColor"><TD ALIGN="right" VALIGN="top" WIDTH="1%"><FONT SIZE="-1"><CODE> java.util.Enumeration</CODE></FONT></TD><TD><CODE><B><A HREF="../../../weka/core/tokenizers/NGramTokenizer.html#listOptions()">listOptions</A></B>()</CODE><BR> Returns an enumeration of all the available options..</TD></TR><TR BGCOLOR="white" CLASS="TableRowColor"><TD ALIGN="right" VALIGN="top" WIDTH="1%"><FONT SIZE="-1"><CODE>static void</CODE></FONT></TD><TD><CODE><B><A HREF="../../../weka/core/tokenizers/NGramTokenizer.html#main(java.lang.String[])">main</A></B>(java.lang.String[] args)</CODE><BR> Runs the tokenizer with the given options and strings to tokenize.</TD></TR><TR BGCOLOR="white" CLASS="TableRowColor"><TD ALIGN="right" VALIGN="top" WIDTH="1%"><FONT SIZE="-1"><CODE> java.lang.Object</CODE></FONT></TD><TD><CODE><B><A HREF="../../../weka/core/tokenizers/NGramTokenizer.html#nextElement()">nextElement</A></B>()</CODE><BR> Returns N-grams and also (N-1)-grams and ....</TD></TR><TR BGCOLOR="white" CLASS="TableRowColor"><TD ALIGN="right" VALIGN="top" WIDTH="1%"><FONT SIZE="-1"><CODE> java.lang.String</CODE></FONT></TD><TD><CODE><B><A HREF="../../../weka/core/tokenizers/NGramTokenizer.html#NGramMaxSizeTipText()">NGramMaxSizeTipText</A></B>()</CODE><BR> Returns the tip text for this property.</TD></TR><TR BGCOLOR="white" CLASS="TableRowColor"><TD ALIGN="right" VALIGN="top" WIDTH="1%"><FONT SIZE="-1"><CODE> java.lang.String</CODE></FONT></TD><TD><CODE><B><A HREF="../../../weka/core/tokenizers/NGramTokenizer.html#NGramMinSizeTipText()">NGramMinSizeTipText</A></B>()</CODE><BR> Returns the tip text for this property.</TD></TR><TR BGCOLOR="white" CLASS="TableRowColor"><TD ALIGN="right" VALIGN="top" WIDTH="1%"><FONT SIZE="-1"><CODE> void</CODE></FONT></TD><TD><CODE><B><A HREF="../../../weka/core/tokenizers/NGramTokenizer.html#setNGramMaxSize(int)">setNGramMaxSize</A></B>(int value)</CODE><BR> Sets the max size of the Ngram.</TD></TR><TR BGCOLOR="white" CLASS="TableRowColor"><TD ALIGN="right" VALIGN="top" WIDTH="1%"><FONT SIZE="-1"><CODE> void</CODE></FONT></TD><TD><CODE><B><A HREF="../../../weka/core/tokenizers/NGramTokenizer.html#setNGramMinSize(int)">setNGramMinSize</A></B>(int value)</CODE><BR> Sets the min size of the Ngram.</TD></TR><TR BGCOLOR="white" CLASS="TableRowColor"><TD ALIGN="right" VALIGN="top" WIDTH="1%"><FONT SIZE="-1"><CODE> void</CODE></FONT></TD><TD><CODE><B><A HREF="../../../weka/core/tokenizers/NGramTokenizer.html#setOptions(java.lang.String[])">setOptions</A></B>(java.lang.String[] options)</CODE><BR> Parses a given list of options.</TD></TR><TR BGCOLOR="white" CLASS="TableRowColor"><TD ALIGN="right" VALIGN="top" WIDTH="1%"><FONT SIZE="-1"><CODE> void</CODE></FONT></TD><TD><CODE><B><A HREF="../../../weka/core/tokenizers/NGramTokenizer.html#tokenize(java.lang.String)">tokenize</A></B>(java.lang.String s)</CODE><BR> Sets the string to tokenize.</TD></TR></TABLE> <A NAME="methods_inherited_from_class_weka.core.tokenizers.CharacterDelimitedTokenizer"><!-- --></A><TABLE BORDER="1" WIDTH="100%" CELLPADDING="3" CELLSPACING="0" SUMMARY=""><TR BGCOLOR="#EEEEFF" CLASS="TableSubHeadingColor"><TH ALIGN="left"><B>Methods inherited from class weka.core.tokenizers.<A HREF="../../../weka/core/tokenizers/CharacterDelimitedTokenizer.html" title="class in weka.core.tokenizers">CharacterDelimitedTokenizer</A></B></TH></TR><TR BGCOLOR="white" CLASS="TableRowColor"><TD><CODE><A HREF="../../../weka/core/tokenizers/CharacterDelimitedTokenizer.html#delimitersTipText()">delimitersTipText</A>, <A HREF="../../../weka/core/tokenizers/CharacterDelimitedTokenizer.html#getDelimiters()">getDelimiters</A>, <A HREF="../../../weka/core/tokenizers/CharacterDelimitedTokenizer.html#setDelimiters(java.lang.String)">setDelimiters</A></CODE></TD></TR></TABLE> <A NAME="methods_inherited_from_class_weka.core.tokenizers.Tokenizer"><!-- --></A><TABLE BORDER="1" WIDTH="100%" CELLPADDING="3" CELLSPACING="0" SUMMARY=""><TR BGCOLOR="#EEEEFF" CLASS="TableSubHeadingColor"><TH ALIGN="left"><B>Methods inherited from class weka.core.tokenizers.<A HREF="../../../weka/core/tokenizers/Tokenizer.html" title="class in weka.core.tokenizers">Tokenizer</A></B></TH></TR><TR BGCOLOR="white" CLASS="TableRowColor"><TD><CODE><A HREF="../../../weka/core/tokenizers/Tokenizer.html#runTokenizer(weka.core.tokenizers.Tokenizer, java.lang.String[])">runTokenizer</A>, <A HREF="../../../weka/core/tokenizers/Tokenizer.html#tokenize(weka.core.tokenizers.Tokenizer, java.lang.String[])">tokenize</A></CODE></TD></TR></TABLE> <A NAME="methods_inherited_from_class_java.lang.Object"><!-- --></A><TABLE BORDER="1" WIDTH="100%" CELLPADDING="3" CELLSPACING="0" SUMMARY=""><TR BGCOLOR="#EEEEFF" CLASS="TableSubHeadingColor"><TH ALIGN="left"><B>Methods inherited from class java.lang.Object</B></TH></TR><TR BGCOLOR="white" CLASS="TableRowColor"><TD><CODE>equals, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait</CODE></TD></TR></TABLE> <P><!-- ========= CONSTRUCTOR DETAIL ======== --><A NAME="constructor_detail"><!-- --></A><TABLE BORDER="1" WIDTH="100%" CELLPADDING="3" CELLSPACING="0" SUMMARY=""><TR BGCOLOR="#CCCCFF" CLASS="TableHeadingColor"><TH ALIGN="left" COLSPAN="1"><FONT SIZE="+2"><B>Constructor Detail</B></FONT></TH></TR></TABLE><A NAME="NGramTokenizer()"><!-- --></A><H3>NGramTokenizer</H3><PRE>public <B>NGramTokenizer</B>()</PRE>
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?