📄 str.java

📁 java写的crawler
💻 JAVA
📖 第 1 页 / 共 2 页
字号:
12 下一页
/* * Copyright (c) 1998-2002 Carnegie Mellon University.  All rights * reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright *    notice, this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright *    notice, this list of conditions and the following disclaimer in *    the documentation and/or other materials provided with the *    distribution. * * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * */package rcm.util;import java.util.StringTokenizer;/** * String utility routines. */public abstract class Str {    /**     * Find first occurence of any of a set of characters.     * @param subject String in which to search     * @param chars Characters to search for     * @return index of first occurence in subject of a character from chars,     * or -1 if no match.     */    public static int indexOfAnyChar (String subject, String chars) {        return indexOfAnyChar (subject, chars, 0);    }    /**     * Find first occurence of any of a set of characters, starting     * at a specified index.     * @param subject String in which to search     * @param chars Characters to search for     * @param start Starting offset to search from     * @return index of first occurence (after start) in subject of a character from chars,     * or -1 if no match.     */    public static int indexOfAnyChar (String subject, String chars, int start) {        for (int i=start; i<subject.length(); ++i)            if (chars.indexOf (subject.charAt (i)) != -1)                return i;        return -1;    }    /**     * Replace all occurences of a string.     * @param subject String in which to search     * @param original String to search for in subject     * @param replacement String to substitute     * @return subject with all occurences of original replaced by replacement     */    public static String replace (String subject, String original, String replacement) {        StringBuffer output = new StringBuffer ();        int p = 0;        int i;        while ((i = subject.indexOf (original, p)) != -1) {            output.append (subject.substring (p, i));            output.append (replacement);            p = i + original.length();        }        if (p < subject.length ())            output.append (subject.substring(p));        return output.toString ();    }    /**     * Escapes metacharacters in a string.     * @param subject String in which metacharacters are to be escaped     * @param escapeChar the escape character (e.g., \)     * @param metachars the metacharacters that should be escaped     * @return subject with escapeChar inserted before every character found in metachars     */    public static String escape (String subject, char escapeChar, String metachars) {        return escape (subject, metachars, escapeChar, metachars);    }    /**     * Escapes characters in a string.     * @param subject String in which metacharacters are to be escaped     * @param chars Characters that need to be escaped (e.g. "\b\t\r\n\\")     * @param escapeChar the escape character (e.g., '\\')     * @param metachars escape code letters corresponding to each letter in chars (e.g. "btrn\\")     *    <B>Must have metachars.length () == chars.length().</B>     * @return subject where every occurence of c in chars is replaced     * by escapeChar followed the character corresponding to c in metachars.     *     */    public static String escape (String subject, String chars, char escapeChar, String metachars) {        StringBuffer output = new StringBuffer ();        int p = 0;        int i;        while ((i = indexOfAnyChar (subject, chars, p)) != -1) {            output.append (subject.substring (p, i));            char c = subject.charAt (i); // character that needs escaping            int k = chars.indexOf (c);            char metac = metachars.charAt (k);   // its corresponding metachar            output.append (escapeChar);            output.append (metac);            p = i + 1;        }        if (p < subject.length ())            output.append (subject.substring(p));        return output.toString ();    }    /**     * Translate escape sequences (e.g. \r, \n) to characters.     * @param subject String in which metacharacters are to be escaped     * @param escapeChar the escape character (e.g., \)     * @param metachars letters representing escape codes (typically "btrn\\")     * @param chars characters corresponding to metachars (typically "\b\t\r\n\\").     *    <B>Must have chars.length () == metachars.length().</B>     * @param keepUntranslatedEscapes Controls behavior on unknown escape sequences     * (see below).     * @return subject where every escapeChar followed by c in metachars     * is replaced by the character corresponding to c in chars.  If an escape     * sequence is untranslatable (because escapeChar is followed by some character c     * not in metachars), then the escapeChar is kept if keepUntranslatedEscapes is true,     * otherwise the escapeChar is deleted. (The character c is always kept.)     *     */    public static String unescape (String subject, char escapeChar, String metachars, String chars, boolean keepUntranslatedEscapes) {        StringBuffer output = new StringBuffer ();        int p = 0;        int i;        int len = subject.length ();        while ((i = subject.indexOf (escapeChar, p)) != -1) {            output.append (subject.substring (p, i));            if (i + 1 == len)                break;            char metac = subject.charAt (i+1);  // metachar to replace            int k = metachars.indexOf (metac);            if (k == -1) {                // untranslatable sequence                if (keepUntranslatedEscapes)                    output.append (escapeChar);                output.append (metac);            }            else                output.append (chars.charAt (k));   // its corresponding true char            p = i + 2;    // skip over both escapeChar & metac        }        if (p < len)            output.append (subject.substring(p));        return output.toString ();    }    /**     * Parse a number from a string. Finds the first recognizable base-10 number (integer or floating point)     * in the string and returns it as a Number.  Uses American English conventions     * (i.e., '.' as decimal point and ',' as thousands separator).     * @param string String to parse     * @return first recognizable number     * @exception NumberFormatException if no recognizable number is found     */    private static final int INT = 0;    private static final int FRAC = 1;    private static final int EXP = 2;    public static Number parseNumber (String s) throws NumberFormatException {        int p = 0;        for (int i=0; i<s.length(); ++i) {            char c = s.charAt (i);            if (Character.isDigit (c)) {                int start = i;                int end = ++i;                int state = INT;                if (start > 0 && s.charAt (start-1) == '.') {                    --start;                    state = FRAC;                }                if (start > 0 && s.charAt (start-1) == '-')                    --start;              foundEnd:                while (i < s.length()) {                    switch (s.charAt (i)) {                      case '0': case '1': case '2': case '3': case '4':                      case '5': case '6': case '7': case '8': case '9':                        end = ++i;                        break;                      case '.':                        if (state != INT)                            break foundEnd;                        state = FRAC;                        ++i;                        break;                      case ',': // ignore commas                        ++i;                        break;                      case 'e':                      case 'E':                        state = EXP;                        ++i;                        if (i < s.length() &&                            ( (c = s.charAt (i)) == '+' || c == '-') )                          ++i;                        break;                      default:                        break foundEnd;                    }                }                String num = s.substring (start, end);                num = replace (num, ",", "");                try {                    if (state == INT)                        return new Integer (num);                    else                        return new Float (num);                } catch (NumberFormatException e) {                    throw new RuntimeException ("internal error: " + e);                }            }        }        throw new NumberFormatException (s);    }/*    For testing parseNumber    public static void main (String[] args) {      for (int i=0; i<args.length; ++i)          System.out.println (parseNumber (args[i]));  }*/    /**     * Generate a string by concatenating n copies of another string.     * @param s String to repeat     * @param n number of times to repeat s     * @return s concatenated with itself n times     */    public static String repeat (String s, int n) {        StringBuffer out = new StringBuffer ();        while (--n >= 0)            out.append (s);        return out.toString ();    }    /**     * Compress whitespace.     * @param s String to compress     * @return string with leading and trailing whitespace removed, and     * internal runs of whitespace replaced by a single space character
12 下一页
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -