📄 ltstreamtokenizer.java
字号:
package jboost.tokenizer;import java.io.BufferedReader;import java.io.FileReader;import java.io.IOException;import jboost.monitor.Monitor;import jboost.util.Util;/** Gets tokens from a string. Differs from Java's StringTokenizer as follows. Disadvantages: Allows only one token terminator. (StringTokenizer allows ranges.) Has less functionality Neutral: Every token must have a terminator (including the last) (In StringTokenizer the terminators are actually separators. See rest() method for getting the characters past last terminator.) Advantages: Terminator can be any string so long as it is not null, empty, or cotains the escape character (default is \), //, /*, or * / (StringTokenizer supports only single-character terminators.) Provides a way of expressing the terminator in text. The escape immediately before the terminator escapes the terminator and causes the terminator to be interpreted literally. To write the escape char (or several) immediately before the terminator, double each. All other appearances of the escape char are interpreted literally. For example, if the terminator is "end" then "\end" will result in "end" appearing as part of the token. "\\end" will result in "\" ending the token (as end is the token terminator), "\\\end" will result in "\end" appearing as part of the token, \\\\end will result in \\ ending the token, etc.*/public class LTStreamTokenizer { private BufferedReader br; private String terminator; // token terminators private int terLen, minLen; // terminator and min lenth of ter, //, /*, */ private int terLines; // number of newlines in terminator private char escape; // char escaping terminator private StringBuffer strBuf= new StringBuffer(); // part of stream that hasn't been returned yet private int strLen= 0; // stringBuffer length private int firstLineNum= 0; // line # where current token begins (>=0) private int lastLineNum= 0; // line # where current token ends (>=0) private boolean disallowComments= false; // comment marks are meaningless /** constructor @param string the whole string @param terminator token terminator, cannot contain backslashes @param escape character used to escape special symbols */ public LTStreamTokenizer( BufferedReader br, String terminator, char escape) { terminator(terminator); escape(escape); this.br= br; } public LTStreamTokenizer(BufferedReader br, String terminator) { this(br, terminator, '\\'); } /** Sets the terminator */ public void terminator(String terminator) { testTerminator(terminator); this.terminator= terminator; terLen= terminator.length(); minLen= terLen >= 2 ? 2 : terLen; for (int i= 0, terLines= 0; i < terLen; i++) if (terminator.charAt(i) == '\n') terLines++; } /** tests if value can be a terminator */ private void testTerminator(String value) { if (value == null || value.equals("") || value.indexOf(escape) != -1 || value.indexOf("//") != -1 || value.indexOf("/*") != -1 || value.indexOf("*/") != -1) throw new IllegalArgumentException( "LTStreamTokenizer: " + "terminator=<" + value + "> is either null, empty, or contains //, /*, or */."); } /** Sets the escape char */ public void escape(char escape) { if (terminator.indexOf(escape) == -1) this.escape= escape; else throw new IllegalArgumentException( "LTStreamTokenizer: " + "escape=" + escape + "appears in terminator."); } public void disallowComments(boolean value) { disallowComments= value; } /** Returns next token, empty string if terminator appears at the beginning, null string if no more terminators. Handles backslashes preceding the token terminator. */ public String next() { return next(false); } /** Returns next token. Empty string if terminator appears at the beginning. If returnPartial is false, behaves exactly as next(): returns null if no more terminators. If returnPartial is true, returns partial token if reaches EOF and returns null only if there is nothing after last terminator. Handles backslashes preceding the token terminator */ public String next(boolean returnPartial) { String curTok= ""; // current token, contains the processed part of // strBuf (after eliminating escapes) int toCopy= 0; // beginning of string not yet copied to curTok int i= 0; // location currently processed int numEscapes; // number of trailing escapes int endComInd; boolean ongoingComment= false; // in ongoing comment region boolean lineComment= false; // in line comment region int numRead= 0; // number of characters read from file final int bufLen= 1000; // read bufLen characters at a time char[] cBuf= new char[bufLen]; // contains characters read from file firstLineNum= lastLineNum; while (true) { // if(Monitor.logLevel>3) Monitor.log("strLen=" + strLen + " toCopy=" + toCopy + " i=" + i); if (i > strLen - minLen) { try { numRead= br.read(cBuf, 0, bufLen); } catch (IOException e) { if (Monitor.logLevel > 3) Monitor.log( "LTStreamTokenizer.next(): " + "IO exception: " + e.getMessage()); e.printStackTrace(); } if (numRead == -1) { // no more data if (strLen != 0 && returnPartial) { curTok += strBuf.substring(toCopy, strLen); strBuf.delete(0, strLen); // necessary? toCopy= i= strLen= 0; // necessary? return curTok; } else // nothing after last token or return null; // don't want partial tokens } // make sure we increase line number strBuf.append(cBuf, 0, numRead); strLen += numRead; } else if (ongoingComment) { // ongoing comment if ((endComInd= StringOp.indexOf(strBuf, i, "*/")) != -1) { // ongoing comment ends in current buffer? lastLineNum += numNewLines(i, endComInd); numEscapes= numEscapes(strBuf.substring(0, endComInd), escape); if (Util.even(numEscapes)) // ends! ongoingComment= false; toCopy= i= endComInd + 2; // whether or not it ends } else { // ongoing comment doesn't end in current buffer lastLineNum += numNewLines(i, strLen); toCopy= i= strLen - numEscapes(strBuf.substring(0, strLen), escape); } } else if (lineComment) { // line comment if ((endComInd= StringOp.indexOf(strBuf, i, "\n")) != -1) { // line ends in current buffer lastLineNum++; lineComment= false; toCopy= i= endComInd + 1; } else // line doesn't end in current buffer toCopy= i= strLen; } else if ( !disallowComments && i <= strLen - 2 && strBuf.substring(i, i + 2).equals("/*")) { // ongoing comment starts? numEscapes= numEscapes(strBuf.substring(0, i), escape); if (Util.even(numEscapes)) { // onging comment starts! curTok += strBuf.substring(toCopy, i - numEscapes / 2); ongoingComment= true; } else { // escaped, part of token curTok += strBuf.substring(toCopy, i - (numEscapes + 1) / 2) + "/*"; toCopy= i + 2; } i += 2; } else if ( !disallowComments && i <= strLen - 2 && strBuf.substring(i, i + 2).equals("//")) { // line comment starts? numEscapes= numEscapes(strBuf.substring(0, i), escape); if (Util.even(numEscapes)) { // line comment starts! curTok += strBuf.substring(toCopy, i - numEscapes / 2); ongoingComment= true; } else // escpaed, part of token curTok += strBuf.substring(toCopy, i - (numEscapes + 1) / 2) + "//"; toCopy= i + 2; } else if ( i <= strLen - terLen && strBuf.substring(i, i + terLen).equals(terminator)) { // terminator? numEscapes= numEscapes(strBuf.substring(0, i), escape); lastLineNum += terLines; if (Util.even(numEscapes)) { // terminator! curTok += strBuf.substring(toCopy, i - (numEscapes) / 2); toCopy= i= i + terLen; strBuf.delete(0, toCopy); // necessary? strLen -= toCopy; return curTok; } else { // escaped curTok += strBuf.substring(toCopy, i - (numEscapes + 1) / 2) + terminator; toCopy= i= i + terLen; } } else { if (strBuf.charAt(i) == '\n') lastLineNum++; i++; } } } /** Returns rest of string Handles backslashes preceding the token terminator Normally used for the part remaining after last token, so no such conversions will take place. */ public String rest() { String curTok; // current token int holdLineNum= lastLineNum; // holds line number where rest starts String temp= ""; while ((curTok= next(true)) != null) temp += curTok; firstLineNum= holdLineNum; // restore first line of rest return temp; } /** Returns line number where current token begins */ public int firstLineNum() { return firstLineNum; } /** Returns line number where current token ends */ public int lastLineNum() { return lastLineNum; } /** Returns the number of contiguous backslashes starting at a location >= beg and ending in location exactly end-1 of string. (end-1 uses the same convention as String.substring) Example: numEscapes("\b\\a", beg, end) is 0 when end=2 or 5 it is 2 for beg=0,1,2 and end=3 and 1 for beg=3 and end=3. @param string a string @param beg first index considered @param end one after last index considered @return number of contiguous backslashes starting at location >= beg and ending in location end of string. */ private int numEscapes(String string, char escape) { int i= string.length(); int numEscapes= 0; while (--i >= 0 && string.charAt(i) == escape) numEscapes++; return numEscapes; } /** @param from start location @param to one more than last location @return number of new lines in strBuf at locations satisfying from<=location<to */ private int numNewLines(int from, int to) { int numNewLines= 0; for (int i= from; i < to; i++) if (strBuf.charAt(i) == '\n') numNewLines++; return numNewLines; } public String toString() { return "Contents of LTStreamTokenizer:" + " terminator=<" + terminator + ">" + " escape=<" + escape + ">" + " strBuf=<" + strBuf + ">" + " firstLineNum=" + firstLineNum + " lastLineNum=" + firstLineNum; } /** Tests LTStreamTokenizer. Three modes: Without arguments, the string is "Hello brave new world\nHow's life?" and the terminator " ". With one argument which is a file name, the terminator is the first line of the file, and the string is the rest. With one argument which is -i, prompts for string and terminator */ public static void main(String[] args) { try { mainCore(args); } catch (IOException e) { if (Monitor.logLevel > 3) Monitor.log("IO exception: " + e.getMessage()); e.printStackTrace(); } catch (RuntimeException e) { if (Monitor.logLevel > 3) Monitor.log("Runtime exception: " + e.getMessage()); e.printStackTrace(); } } public static void mainCore(String[] args) throws IOException { String string; String terminator; if (Monitor.logLevel > 3) Monitor.log("Testing LTStreamTokenizer."); if (args.length != 1) // wrong number of arguments throw new RuntimeException("Usage: LTStreamTokenizer <filnename>"); BufferedReader br= new BufferedReader(new FileReader(args[0])); terminator= br.readLine(); if (Monitor.logLevel > 3) Monitor.log( "Token terminator is: <" + terminator + ">" + " tokenizing rest of file."); LTStreamTokenizer sst= new LTStreamTokenizer(br, terminator); if (Monitor.logLevel > 3) Monitor.log("Tokens:"); String newToken; while ((newToken= sst.next()) != null) if (Monitor.logLevel > 3) Monitor.log( "<" + newToken + ">" + " lines " + sst.firstLineNum() + " to " + sst.lastLineNum()); if (Monitor.logLevel > 3) Monitor.log( " Rest: <" + sst.rest() + ">" + " lines " + sst.firstLineNum() + " to " + sst.lastLineNum()); }}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -