stringutils.java.svn-base

来自「开源项目openfire的完整源程序」· SVN-BASE 代码 · 共 2,677 行 · 第 1/5 页

SVN-BASE
2,677
字号
     * <p/>
     * Copyright (c) 2002-2003 The Apache Software Foundation.  All rights
     * <p/>
     * reserved.
     * <p/>
     * <p/>
     * <p/>
     * Redistribution and use in source and binary forms, with or without
     * <p/>
     * modification, are permitted provided that the following conditions
     * <p/>
     * are met:
     * <p/>
     * <p/>
     * <p/>
     * 1. Redistributions of source code must retain the above copyright
     * <p/>
     * notice, this list of conditions and the following disclaimer.
     * <p/>
     * <p/>
     * <p/>
     * 2. Redistributions in binary form must reproduce the above copyright
     * <p/>
     * notice, this list of conditions and the following disclaimer in
     * <p/>
     * the documentation and/or other materials provided with the
     * <p/>
     * distribution.
     * <p/>
     * <p/>
     * <p/>
     * 3. The end-user documentation included with the redistribution, if
     * <p/>
     * any, must include the following acknowlegement:
     * <p/>
     * "This product includes software developed by the
     * <p/>
     * Apache Software Foundation (http://www.apache.org/)."
     * <p/>
     * Alternately, this acknowlegement may appear in the software itself,
     * <p/>
     * if and wherever such third-party acknowlegements normally appear.
     * <p/>
     * <p/>
     * <p/>
     * 4. The names "The Jakarta Project", "Commons", and "Apache Software
     * <p/>
     * Foundation" must not be used to endorse or promote products derived
     * <p/>
     * from this software without prior written permission. For written
     * <p/>
     * permission, please contact apache@apache.org.
     * <p/>
     * <p/>
     * <p/>
     * 5. Products derived from this software may not be called "Apache"
     * <p/>
     * nor may "Apache" appear in their names without prior written
     * <p/>
     * permission of the Apache Group.
     * <p/>
     * <p/>
     * <p/>
     * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
     * <p/>
     * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
     * <p/>
     * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
     * <p/>
     * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
     * <p/>
     * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
     * <p/>
     * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
     * <p/>
     * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
     * <p/>
     * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
     * <p/>
     * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
     * <p/>
     * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
     * <p/>
     * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     * <p/>
     * SUCH DAMAGE.
     * <p/>
     * ====================================================================
     * <p/>
     * <p/>
     * <p/>
     * This software consists of voluntary contributions made by many
     * <p/>
     * individuals on behalf of the Apache Software Foundation.  For more
     * <p/>
     * information on the Apache Software Foundation, please see
     * <p/>
     * <http://www.apache.org/>.
     */

    private static final BitSet allowed_query = new BitSet(256);


    static {

        for (int i = '0'; i <= '9'; i++) {

            allowed_query.set(i);

        }


        for (int i = 'a'; i <= 'z'; i++) {

            allowed_query.set(i);

        }

        for (int i = 'A'; i <= 'Z'; i++) {

            allowed_query.set(i);

        }


        allowed_query.set('-');

        allowed_query.set('_');

        allowed_query.set('.');

        allowed_query.set('!');

        allowed_query.set('~');

        allowed_query.set('*');

        allowed_query.set('\'');

        allowed_query.set('(');

        allowed_query.set(')');

    }


    /**
     * Encodes URI string. This is a replacement for the java.net.URLEncode#encode(String, String)
     * <p/>
     * class which is broken under JDK 1.3.
     * <p/>
     * <p/>
     *
     * @param original the original character sequence
     * @param charset  the protocol charset
     * @return URI character sequence
     * @throws UnsupportedEncodingException unsupported character encoding
     */

    public static String URLEncode(String original, String charset)

            throws UnsupportedEncodingException

    {

        // encode original to uri characters.

        if (original == null) {

            return null;

        }

        // escape octet to uri characters.

        byte[] octets;


        try {

            octets = original.getBytes(charset);

        }

        catch (UnsupportedEncodingException error) {

            throw new UnsupportedEncodingException();

        }


        StringBuffer buf = new StringBuffer(octets.length);


        for (int i = 0; i < octets.length; i++) {

            char c = (char)octets[i];

            if (allowed_query.get(c)) {

                buf.append(c);

            }

            else {

                buf.append('%');

                byte b = octets[i]; // use the original byte value

                char hexadecimal = Character.forDigit((b >> 4) & 0xF, 16);

                buf.append(Character.toUpperCase(hexadecimal)); // high

                hexadecimal = Character.forDigit(b & 0xF, 16);

                buf.append(Character.toUpperCase(hexadecimal)); // low

            }

        }


        return buf.toString();

    }


    private static final int fillchar = '=';

    private static final String cvt = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"

            + "abcdefghijklmnopqrstuvwxyz"

            + "0123456789+/";


    /**
     * Converts a line of text into an array of lower case words using a
     * <p/>
     * BreakIterator.wordInstance(). <p>
     * <p/>
     * <p/>
     * <p/>
     * This method is under the Jive Open Source Software License and was
     * <p/>
     * written by Mark Imbriaco.
     *
     * @param text a String of text to convert into an array of words
     * @return text broken up into an array of words.
     */

    public static final String [] toLowerCaseWordArray(String text) {

        if (text == null || text.length() == 0) {

            return new String[0];

        }


        ArrayList wordList = new ArrayList();

        BreakIterator boundary = BreakIterator.getWordInstance();

        boundary.setText(text);

        int start = 0;


        for (int end = boundary.next(); end != BreakIterator.DONE;

             start = end, end = boundary.next())

        {

            String tmp = text.substring(start, end).trim();

            // Remove characters that are not needed.

            tmp = replace(tmp, "+", "");

            tmp = replace(tmp, "/", "");

            tmp = replace(tmp, "\\", "");

            tmp = replace(tmp, "#", "");

            tmp = replace(tmp, "*", "");

            tmp = replace(tmp, ")", "");

            tmp = replace(tmp, "(", "");

            tmp = replace(tmp, "&", "");

            if (tmp.length() > 0) {

                wordList.add(tmp);

            }

        }

        return (String[])wordList.toArray(new String[wordList.size()]);

    }


    /**
     * Pseudo-random number generator object for use with randomString().
     * <p/>
     * The Random class is not considered to be cryptographically secure, so
     * <p/>
     * only use these random Strings for low to medium security applications.
     */

    private static Random randGen = new Random();


    /**
     * Array of numbers and letters of mixed case. Numbers appear in the list
     * <p/>
     * twice so that there is a more equal chance that a number will be picked.
     * <p/>
     * We can use the array to get a random number or letter by picking a random
     * <p/>
     * array index.
     */

    private static char[] numbersAndLetters = ("0123456789abcdefghijklmnopqrstuvwxyz" +

            "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ").toCharArray();


    /**
     * Returns a random String of numbers and letters (lower and upper case)
     * <p/>
     * of the specified length. The method uses the Random class that is
     * <p/>
     * built-in to Java which is suitable for low to medium grade security uses.
     * <p/>
     * This means that the output is only pseudo random, i.e., each number is
     * <p/>
     * mathematically generated so is not truly random.<p>
     * <p/>
     * <p/>
     * <p/>
     * The specified length must be at least one. If not, the method will return
     * <p/>
     * null.
     *
     * @param length the desired length of the random String to return.
     * @return a random String of numbers and letters of the specified length.
     */

    public static final String randomString(int length) {

        if (length < 1) {

            return null;

        }

        // Create a char buffer to put random letters and numbers in.

        char [] randBuffer = new char[length];

        for (int i = 0; i < randBuffer.length; i++) {

            randBuffer[i] = numbersAndLetters[randGen.nextInt(71)];

        }

        return new String(randBuffer);

    }


    /**
     * Intelligently chops a String at a word boundary (whitespace) that occurs
     * <p/>
     * at the specified index in the argument or before. However, if there is a
     * <p/>
     * newline character before <code>length</code>, the String will be chopped
     * <p/>
     * there. If no newline or whitespace is found in <code>string</code> up to
     * <p/>
     * the index <code>length</code>, the String will chopped at <code>length</code>.
     * <p/>
     * <p/>
     * <p/>
     * For example, chopAtWord("This is a nice String", 10, -1) will return
     * <p/>
     * "This is a" which is the first word boundary less than or equal to 10
     * <p/>
     * characters into the original String.
     *
     * @param string    the String to chop.
     * @param length    the index in <code>string</code> to start looking for a
     *                  <p/>
     *                  whitespace boundary at.
     * @param minLength the minimum length the word should be chopped at. This is helpful
     *                  <p/>
     *                  for words with no natural boundaries, ie: "thisisareallylonglonglongword".
     *                  <p/>
     *                  This must be smaller than length and can be -1 if no minLength is wanted
     * @return a substring of <code>string</code> whose length is less than or
     *         <p/>
     *         equal to <code>length</code>, and that is chopped at whitespace.
     */

    public static final String chopAtWord(String string, int length, int minLength) {

        // guard clauses

        if (length < 2) {

            throw new IllegalArgumentException("Length specified (" + length + ") must be > 2");

        }

        else if (minLength >= length) {

            throw new IllegalArgumentException("minLength must be smaller than length");

        }


        int sLength = (string == null) ? -1 : string.length();

        // shortcircuit clauses

        if (sLength < 1) {

            return string;

        }

        // minLength specified, string is smaller than the minLength, return the string

        else if (minLength != -1 && sLength < minLength) {

            return string;

        }

        // no minLength specified, string is smaller than length

        else if (minLength == -1 && sLength < length) {

            return string;

        }


        char [] charArray = string.toCharArray();

        // String is longer than the length specified, attempt to find a newline

        // or a space

        if (sLength > length) {

            sLength = length;

            // First check if there is a newline character before length; if so,

            // chop word there.

            for (int i = 0; i < sLength - 1; i++) {

                // Windows

                if (charArray[i] == '\r' && charArray[i + 1] == '\n') {

                    return string.substring(0, i + 1);

                }

                // Unix

                else if (charArray[i] == '\n') {

                    return string.substring(0, i);

                }

            }

            // Also check boundary case of Unix newline

            if (charArray[sLength - 1] == '\n') {

                return string.substring(0, sLength - 1);

            }

            // No newline, so chop at the first whitespace.

            for (int i = sLength - 1; i > 0; i--) {

                if (charArray[i] == ' ') {

                    return string.substring(0, i).trim();

                }

            }

        }

        // String is shorter than length but longer than minLength,

        // make sure there is a space in the string before minLength

        else if (minLength != -1 && sLength > minLength) {

            for (int i = 0; i < minLength; i++) {

                if (charArray[i] == ' ') {

                    return string;

                }

            }

        }

        // Did not find a word boundary, so return a string at the min length, if a min

        // length was specified:

        if (minLength > -1 && minLength <= string.length()) {

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?