tokenize.html
来自「java类库详细讲解」· HTML 代码 · 共 224 行
HTML
224 行
<HTML>
<HEAD>
<META http-equiv="Content-Type" content="text/html; charset=UTF-8">
<TITLE>Parsing a String into Tokens Using a Regular Expression
(Java Developers Almanac Example)
</TITLE>
<META CONTENT="Patrick Chan" NAME="AUTHOR">
<META CONTENT="Code Examples from The Java Developers Almanac 1.4" NAME="DESCRIPTION">
<META CONTENT="Addison-Wesley/Patrick Chan" NAME="OWNER">
<META CONTENT="3/20/02" NAME="revision">
<STYLE TYPE="text/css">
<!-- BODY CODE {font-family: Courier, Monospace; font-size: 11pt} TABLE, BODY {font-family: Verdana, Arial, Helvetica, sans-serif; font-size: 10pt} PRE {font-family: Courier, Monospace; font-size: 10pt} H3 {font-family: Verdana, Arial, Helvetica, sans-serif; font-size: 11pt} A.eglink {text-decoration: none} A:hover.eglink {text-decoration: underline} -->
</STYLE>
</HEAD>
<BODY>
<TABLE CELLSPACING="0" CELLPADDING="0" BORDER="0">
<TR>
<TD rowspan="3"><A HREF="/?l=ex"><IMG BORDER="0" ALIGN="BOTTOM" HSPACE="10" SRC="/egs/almanac14a.jpg"></A></TD><TD VALIGN="top"><font face="Times" size="6"><b>The Java Developers Almanac 1.4</b></font>
<br>
Order this book from <a href="/cgi-bin/scripts/redirect.pl?l=ex&url=http://www.amazon.com/exec/obidos/ASIN/0201752808/xeo">Amazon</a>.
</TD>
</TR>
<TR>
<TD align="right" valign="bottom">
<FORM method="get" action="/cgi-bin/search/find.pl">
<INPUT size="25" name="words" type="text"><INPUT value="Search" type="submit">
</FORM>
</TD>
</TR>
</TABLE>
<HR color="#6666cc">
<DIV ALIGN="LEFT">
<A HREF="/">Home</A>
>
<A HREF="../index.html">List of Packages</A>
>
<B><A HREF="../java.util.regex/pkg.html">java.util.regex</A></B><font color="#666666" SIZE="-2">
[26 examples]
</font>
>
<B><A HREF="../java.util.regex/pkg.html#Tokenizing">Tokenizing</A></B><font color="#666666" SIZE="-2">
[2 examples]
</font>
</DIV><P>
<h3>
e432.
Parsing a String into Tokens Using a Regular Expression</h3>
This example implements a tokenizer that uses regular expressions.
The use of this tokenizer is similar to the <code>StringTokenizer</code> class in
that you use it like an iterator to extract the tokens.
<pre>
CharSequence inputStr = <font color="#0066ff"><i>"a 1 2 b c 3 4"</i></font>;
String patternStr = <font color="#0066ff"><i>"[a-z]"</i></font>;
// Set to false if only the tokens that match the pattern are to be returned.
// If true, the text between matching tokens are also returned.
boolean returnDelims = <font color="#0066ff"><i>true</i></font>;
// Create the tokenizer
Iterator tokenizer = new RETokenizer(inputStr, patternStr, returnDelims);
// Get the tokens (and delimiters)
for (; tokenizer.hasNext(); ) {
String tokenOrDelim = (String)tokenizer.next();
}
// "", "a", " 1 2 ", "b", " ", "c"
class RETokenizer implements Iterator {
// Holds the original input to search for tokens
private CharSequence input;
// Used to find tokens
private Matcher matcher;
// If true, the String between tokens are returned
private boolean returnDelims;
// The current delimiter value. If non-null, should be returned
// at the next call to next()
private String delim;
// The current matched value. If non-null and delim=null,
// should be returned at the next call to next()
private String match;
// The value of matcher.end() from the last successful match.
private int lastEnd = 0;
// patternStr is a regular expression pattern that identifies tokens.
// If returnDelims delim is false, only those tokens that match the
// pattern are returned. If returnDelims true, the text between
// matching tokens are also returned. If returnDelims is true, the
// tokens are returned in the following sequence - delimiter, token,
// delimiter, token, etc. Tokens can never be empty but delimiters might
// be empty (empty string).
public RETokenizer(CharSequence input, String patternStr, boolean returnDelims) {
// Save values
this.input = input;
this.returnDelims = returnDelims;
// Compile pattern and prepare input
Pattern pattern = Pattern.compile(patternStr);
matcher = pattern.matcher(input);
}
// Returns true if there are more tokens or delimiters.
public boolean hasNext() {
if (matcher == null) {
return false;
}
if (delim != null || match != null) {
return true;
}
if (matcher.find()) {
if (returnDelims) {
delim = input.subSequence(lastEnd, matcher.start()).toString();
}
match = matcher.group();
lastEnd = matcher.end();
} else if (returnDelims && lastEnd < input.length()) {
delim = input.subSequence(lastEnd, input.length()).toString();
lastEnd = input.length();
// Need to remove the matcher since it appears to automatically
// reset itself once it reaches the end.
matcher = null;
}
return delim != null || match != null;
}
// Returns the next token (or delimiter if returnDelims is true).
public Object next() {
String result = null;
if (delim != null) {
result = delim;
delim = null;
} else if (match != null) {
result = match;
match = null;
}
return result;
}
// Returns true if the call to next() will return a token rather
// than a delimiter.
public boolean isNextToken() {
return delim == null && match != null;
}
// Not supported.
public void remove() {
throw new UnsupportedOperationException();
}
}
</pre>
<P><table width="600" CELLSPACING="0" CELLPADDING="2" BORDER="0">
<tr>
<td bgcolor="#6666cc" align="center"><font color="#ffffff">
Related Examples
</font></td>
</tr>
</table>
e431. <a class="eglink" href="ParseLine.html?l=rel">
Parsing Character-Separated Data with a Regular Expression
</a>
<br>
<table width="600" CELLSPACING="0" CELLPADDING="2" BORDER="0">
<tr>
<td align="left">
<br>
See also:
<a class="eglink" href="/egs/java.util.regex/pkg.html?l=rel#Flags">
Flags
</a>
<a class="eglink" href="/egs/java.util.regex/pkg.html?l=rel#Groups">
Groups
</a>
<a class="eglink" href="/egs/java.util.regex/pkg.html?l=rel#Lines">
Lines
</a>
<a class="eglink" href="/egs/java.util.regex/pkg.html?l=rel#Paragraphs">
Paragraphs
</a>
<a class="eglink" href="/egs/java.util.regex/pkg.html?l=rel#Searching%20and%20Replacing">
Searching and Replacing
</a>
</td>
</tr>
</table>
<br>
<br>
<FONT FACE="Verdana, Arial, Helvetica, sans-serif" SIZE="0">
© 2002 Addison-Wesley.
</FONT>
</BODY>
</HTML>
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?