📄 stringutils.java

📁 这是学习Java必须读懂两套源代码
💻 JAVA
📖 第 1 页 / 共 2 页
字号:
上一页 12
        String [] words = new String[tokens.countTokens()];
        for (int i=0; i<words.length; i++) {
            words[i] = tokens.nextToken().toLowerCase();
        }
        return words;
    }

    /**
     * A list of some of the most common words. For searching and indexing, we
     * often want to filter out these words since they just confuse searches.
     * The list was not created scientifically so may be incomplete :)
     */
    private static final String [] commonWords =  new String [] {
        "a", "and", "as", "at", "be", "do", "i", "if", "in", "is", "it", "so",
        "the", "to"
    };
    private static Map commonWordsMap = null;

    /**
     * Returns a new String array with some of the most common English words
     * removed. The specific words removed are: a, and, as, at, be, do, i, if,
     * in, is, it, so, the, to
     */
    public static final String [] removeCommonWords(String [] words) {
        //See if common words map has been initialized. We don't statically
        //initialize it to save some memory. Even though this a small savings,
        //it adds up with hundreds of classes being loaded.
        if (commonWordsMap == null) {
            synchronized(initLock) {
                if (commonWordsMap == null) {
                    commonWordsMap = new HashMap();
                    for (int i=0; i<commonWords.length; i++) {
                        commonWordsMap.put(commonWords[i], commonWords[i]);
                    }
                }
            }
        }
        //Now, add all words that aren't in the common map to results
        ArrayList results = new ArrayList(words.length);
        for (int i=0; i<words.length; i++) {
            if (!commonWordsMap.containsKey(words[i])) {
                results.add(words[i]);
            }
        }
        return (String[])results.toArray(new String[results.size()]);
    }

    /**
     * Pseudo-random number generator object for use with randomString().
     * The Random class is not considered to be cryptographically secure, so
     * only use these random Strings for low to medium security applications.
     */
    private static Random randGen = null;

    /**
     * Array of numbers and letters of mixed case. Numbers appear in the list
     * twice so that there is a more equal chance that a number will be picked.
     * We can use the array to get a random number or letter by picking a random
     * array index.
     */
    private static char[] numbersAndLetters = null;

    /**
     * Returns a random String of numbers and letters of the specified length.
     * The method uses the Random class that is built-in to Java which is
     * suitable for low to medium grade security uses. This means that the
     * output is only pseudo random, i.e., each number is mathematically
     * generated so is not truly random.<p>
     *
     * For every character in the returned String, there is an equal chance that
     * it will be a letter or number. If a letter, there is an equal chance
     * that it will be lower or upper case.<p>
     *
     * The specified length must be at least one. If not, the method will return
     * null.
     *
     * @param length the desired length of the random String to return.
     * @return a random String of numbers and letters of the specified length.
     */
    public static final String randomString(int length) {
        if (length < 1) {
            return null;
        }
        //Init of pseudo random number generator.
        if (randGen == null) {
            synchronized (initLock) {
                if (randGen == null) {
                    randGen = new Random();
                    //Also initialize the numbersAndLetters array
                    numbersAndLetters = ("0123456789abcdefghijklmnopqrstuvwxyz" +
                    "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ").toCharArray();
                }
            }
        }
        //Create a char buffer to put random letters and numbers in.
        char [] randBuffer = new char[length];
        for (int i=0; i<randBuffer.length; i++) {
            randBuffer[i] = numbersAndLetters[randGen.nextInt(71)];
        }
        return new String(randBuffer);
    }

   /**
    * Intelligently chops a String at a word boundary (whitespace) that occurs
    * at the specified index in the argument or before. However, if there is a
    * newline character before <code>length</code>, the String will be chopped
    * there. If no newline or whitespace is found in <code>string</code> up to
    * the index <code>length</code>, the String will chopped at <code>length</code>.
    * <p>
    * For example, chopAtWord("This is a nice String", 10) will return
    * "This is a" which is the first word boundary less than or equal to 10
    * characters into the original String.
    *
    * @param string the String to chop.
    * @param length the index in <code>string</code> to start looking for a
    *       whitespace boundary at.
    * @return a substring of <code>string</code> whose length is less than or
    *       equal to <code>length</code>, and that is chopped at whitespace.
    */
    public static final String chopAtWord(String string, int length) {
        if (string == null) {
            return string;
        }

        char [] charArray = string.toCharArray();
        int sLength = string.length();
        if (length < sLength) {
            sLength = length;
        }

        //First check if there is a newline character before length; if so,
        //chop word there.
        for (int i=0; i<sLength-1; i++) {
            //Windows
            if (charArray[i] == '\r' && charArray[i+1] == '\n') {
                return string.substring(0, i);
            }
            //Unix
            else if (charArray[i] == '\n') {
                return string.substring(0, i);
            }
        }
        //Also check boundary case of Unix newline
        if (charArray[sLength-1] == '\n') {
            return string.substring(0, sLength-1);
        }

        //Done checking for newline, now see if the total string is less than
        //the specified chop point.
        if (string.length() < length) {
            return string;
        }

        //No newline, so chop at the first whitespace.
        for (int i = length-1; i > 0; i--) {
            if (charArray[i] == ' ') {
                return string.substring(0, i).trim();
            }
        }

        //Did not find word boundary so return original String chopped at
        //specified length.
        return string.substring(0, length);
    }

    /**
     * Highlights words in a string. Words matching ignores case. The actual
     * higlighting method is specified with the start and end higlight tags.
     * Those might be beginning and ending HTML bold tags, or anything else.
     *
     * @param string the String to highlight words in.
     * @param words an array of words that should be highlighted in the string.
     * @param startHighlight the tag that should be inserted to start highlighting.
     * @param endHighlight the tag that should be inserted to end highlighting.
     * @return a new String with the specified words highlighted.
     */
    public static final String highlightWords(String string, String[] words,
        String startHighlight, String endHighlight)
    {
        if (string == null || words == null ||
                startHighlight == null || endHighlight == null)
        {
            return null;
        }

        //Iterate through each word.
        for (int x=0; x<words.length; x++) {
            //we want to ignore case.
            String lcString = string.toLowerCase();
            //using a char [] is more efficient
            char [] string2 = string.toCharArray();
            String word = words[x].toLowerCase();

            //perform specialized replace logic
            int i=0;
            if ( ( i=lcString.indexOf( word, i ) ) >= 0 ) {
                int oLength = word.length();
                StringBuffer buf = new StringBuffer(string2.length);

                //we only want to highlight distinct words and not parts of
                //larger words. The method used below mostly solves this. There
                //are a few cases where it doesn't, but it's close enough.
                boolean startSpace = false;
                char startChar = ' ';
                if (i-1 > 0) {
                    startChar = string2[i-1];
                    if (!Character.isLetter(startChar)) {
                        startSpace = true;
                    }
                }
                boolean endSpace = false;
                char endChar = ' ';
                if (i+oLength<string2.length) {
                    endChar = string2[i+oLength];
                    if (!Character.isLetter(endChar))  {
                        endSpace = true;
                    }
                }
                if ((startSpace && endSpace) || (i==0 && endSpace)) {
                    buf.append(string2, 0, i);
                    if (startSpace && startChar==' ') { buf.append(startChar); }
                    buf.append(startHighlight);
                    buf.append(string2, i, oLength).append(endHighlight);
                    if (endSpace && endChar==' ') { buf.append(endChar); }
                }
                else {
                    buf.append(string2, 0, i);
                    buf.append(string2, i, oLength);
                }

                i += oLength;
                int j = i;
                while( ( i=lcString.indexOf( word, i ) ) > 0 ) {
                    startSpace = false;
                    startChar = string2[i-1];
                    if (!Character.isLetter(startChar)) {
                        startSpace = true;
                    }

                    endSpace = false;
                    if (i+oLength<string2.length) {
                        endChar = string2[i+oLength];
                        if (!Character.isLetter(endChar))  {
                            endSpace = true;
                        }
                    }
                    if ((startSpace && endSpace) || i+oLength==string2.length) {
                        buf.append(string2, j, i-j);
                        if (startSpace && startChar==' ') { buf.append(startChar); }
                        buf.append(startHighlight);
                        buf.append(string2, i, oLength).append(endHighlight);
                        if (endSpace && endChar==' ') { buf.append(endChar); }
                    }
                    else {
                        buf.append(string2, j, i-j);
                        buf.append(string2, i, oLength);
                    }
                    i += oLength;
                    j = i;
                }
                buf.append(string2, j, string2.length - j);
                string = buf.toString();
            }
        }
        return string;
    }

    /**
     * Escapes all necessary characters in the String so that it can be used
     * in an XML doc.
     *
     * @param string the string to escape.
     * @return the string with appropriate characters escaped.
     */
    public static final String escapeForXML(String string) {
        //Check if the string is null or zero length -- if so, return
        //what was sent in.
        if (string == null || string.length() == 0 ) {
            return string;
        }
        char [] sArray = string.toCharArray();
        StringBuffer buf = new StringBuffer(sArray.length);
        char ch;
        for (int i=0; i<sArray.length; i++) {
            ch = sArray[i];
            if(ch == '<') {
                buf.append("&lt;");
            }
            else if (ch == '&') {
                buf.append("&amp;");
            }
            else if (ch == '"') {
                buf.append("&quot;");
            }
            else {
                buf.append(ch);
            }
        }
        return buf.toString();
    }

}
上一页 12
💿 文件大小 1840 K
👤 上传用户 pangbo888
📂 所属分类 J2ME
🏷️ 相关标签

#Java #源代码
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -