tokenize.html

来自「java类库详细讲解」· HTML 代码 · 共 224 行

HTML
224
字号
<HTML>
<HEAD>
<META http-equiv="Content-Type" content="text/html; charset=UTF-8">
<TITLE>Parsing a String into Tokens Using a Regular Expression
(Java Developers Almanac Example)
</TITLE>
<META CONTENT="Patrick Chan" NAME="AUTHOR">
<META CONTENT="Code Examples from The Java Developers Almanac 1.4" NAME="DESCRIPTION">
<META CONTENT="Addison-Wesley/Patrick Chan" NAME="OWNER">
<META CONTENT="3/20/02" NAME="revision">
<STYLE TYPE="text/css">
<!--     BODY CODE  {font-family: Courier, Monospace;           font-size: 11pt}    TABLE, BODY          {font-family: Verdana, Arial, Helvetica, sans-serif;           font-size: 10pt}    PRE   {font-family: Courier, Monospace;           font-size: 10pt}    H3    {font-family: Verdana, Arial, Helvetica, sans-serif;           font-size: 11pt}    A.eglink {text-decoration: none}    A:hover.eglink {text-decoration: underline}    -->
</STYLE>
</HEAD>
<BODY>
<TABLE CELLSPACING="0" CELLPADDING="0" BORDER="0">
<TR>
<TD rowspan="3"><A HREF="/?l=ex"><IMG BORDER="0" ALIGN="BOTTOM" HSPACE="10" SRC="/egs/almanac14a.jpg"></A></TD><TD VALIGN="top"><font face="Times" size="6"><b>The Java Developers Almanac 1.4</b></font>
<br>
        Order this book from <a href="/cgi-bin/scripts/redirect.pl?l=ex&url=http://www.amazon.com/exec/obidos/ASIN/0201752808/xeo">Amazon</a>.
    </TD>
</TR>
<TR>
<TD align="right" valign="bottom">
<FORM method="get" action="/cgi-bin/search/find.pl">
<INPUT size="25" name="words" type="text"><INPUT value="Search" type="submit">
</FORM>
</TD>
</TR>
</TABLE>
<HR color="#6666cc">
<DIV ALIGN="LEFT">
<A HREF="/">Home</A>
    &gt;
    <A HREF="../index.html">List of Packages</A>
    &gt;
    <B><A HREF="../java.util.regex/pkg.html">java.util.regex</A></B><font color="#666666" SIZE="-2">
        &nbsp;[26 examples]
        </font>
        &gt;
        <B><A HREF="../java.util.regex/pkg.html#Tokenizing">Tokenizing</A></B><font color="#666666" SIZE="-2">
            &nbsp;[2 examples]
            </font>
</DIV><P>
  <h3>
    e432.  
    Parsing a String into Tokens Using a Regular Expression</h3>

This example implements a tokenizer that uses regular expressions.
The use of this tokenizer is similar to the <code>StringTokenizer</code> class in
that you use it like an iterator to extract the tokens.


<pre>
    CharSequence inputStr = <font color="#0066ff"><i>"a 1 2 b c 3 4"</i></font>;
    String patternStr = <font color="#0066ff"><i>"[a-z]"</i></font>;
    
    // Set to false if only the tokens that match the pattern are to be returned.
    // If true, the text between matching tokens are also returned.
    boolean returnDelims = <font color="#0066ff"><i>true</i></font>;
    
    // Create the tokenizer
    Iterator tokenizer = new RETokenizer(inputStr, patternStr, returnDelims);
    
    // Get the tokens (and delimiters)
    for (; tokenizer.hasNext(); ) {
        String tokenOrDelim = (String)tokenizer.next();
    }
    // "", "a", " 1 2 ", "b", " ", "c"
    
    class RETokenizer implements Iterator {
        // Holds the original input to search for tokens
        private CharSequence input;
    
        // Used to find tokens
        private Matcher matcher;
    
        // If true, the String between tokens are returned
        private boolean returnDelims;
    
        // The current delimiter value. If non-null, should be returned
        // at the next call to next()
        private String delim;
    
        // The current matched value. If non-null and delim=null,
        // should be returned at the next call to next()
        private String match;
    
        // The value of matcher.end() from the last successful match.
        private int lastEnd = 0;
    
        // patternStr is a regular expression pattern that identifies tokens.
        // If returnDelims delim is false, only those tokens that match the
        // pattern are returned. If returnDelims true, the text between
        // matching tokens are also returned. If returnDelims is true, the
        // tokens are returned in the following sequence - delimiter, token,
        // delimiter, token, etc. Tokens can never be empty but delimiters might
        // be empty (empty string).
        public RETokenizer(CharSequence input, String patternStr, boolean returnDelims) {
            // Save values
            this.input = input;
            this.returnDelims = returnDelims;
    
            // Compile pattern and prepare input
            Pattern pattern = Pattern.compile(patternStr);
            matcher = pattern.matcher(input);
        }
    
        // Returns true if there are more tokens or delimiters.
        public boolean hasNext() {
            if (matcher == null) {
                return false;
            }
            if (delim != null || match != null) {
                return true;
            }
            if (matcher.find()) {
                if (returnDelims) {
                    delim = input.subSequence(lastEnd, matcher.start()).toString();
                }
                match = matcher.group();
                lastEnd = matcher.end();
            } else if (returnDelims &amp;&amp; lastEnd &lt; input.length()) {
                delim = input.subSequence(lastEnd, input.length()).toString();
                lastEnd = input.length();
    
                // Need to remove the matcher since it appears to automatically
                // reset itself once it reaches the end.
                matcher = null;
            }
            return delim != null || match != null;
        }
    
        // Returns the next token (or delimiter if returnDelims is true).
        public Object next() {
            String result = null;
    
            if (delim != null) {
                result = delim;
                delim = null;
            } else if (match != null) {
                result = match;
                match = null;
            }
            return result;
        }
    
        // Returns true if the call to next() will return a token rather
        // than a delimiter.
        public boolean isNextToken() {
            return delim == null &amp;&amp; match != null;
        }
    
        // Not supported.
        public void remove() {
            throw new UnsupportedOperationException();
        }
    }
</pre>
<P><table width="600" CELLSPACING="0" CELLPADDING="2" BORDER="0">
<tr>
<td bgcolor="#6666cc" align="center"><font color="#ffffff">
            &nbsp;Related Examples
        </font></td>
</tr>
</table>


e431. <a class="eglink" href="ParseLine.html?l=rel">
    Parsing Character-Separated Data with a Regular Expression
</a>
<br>


<table width="600" CELLSPACING="0" CELLPADDING="2" BORDER="0">
<tr>
<td align="left">
<br>
        See also: 
<a class="eglink" href="/egs/java.util.regex/pkg.html?l=rel#Flags">
    Flags
</a>&nbsp;&nbsp;

<a class="eglink" href="/egs/java.util.regex/pkg.html?l=rel#Groups">
    Groups
</a>&nbsp;&nbsp;

<a class="eglink" href="/egs/java.util.regex/pkg.html?l=rel#Lines">
    Lines
</a>&nbsp;&nbsp;

<a class="eglink" href="/egs/java.util.regex/pkg.html?l=rel#Paragraphs">
    Paragraphs
</a>&nbsp;&nbsp;

<a class="eglink" href="/egs/java.util.regex/pkg.html?l=rel#Searching%20and%20Replacing">
    Searching and Replacing
</a>&nbsp;&nbsp;

</td>
</tr>
</table>

<br>

<br>
<FONT FACE="Verdana, Arial, Helvetica, sans-serif" SIZE="0">
&copy; 2002 Addison-Wesley.
</FONT>
</BODY>
</HTML>

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?