📄 stringutils.java

📁 Jive 是一个系统工程
💻 JAVA
📖 第 1 页 / 共 2 页
字号:
上一页 12
     * A list of some of the most common words. For searching and indexing, we     * often want to filter out these words since they just confuse searches.     * The list was not created scientifically so may be incomplete :)     */    private static final String [] commonWords =  new String [] {        "a", "and", "as", "at", "be", "do", "i", "if", "in", "is", "it", "so",        "the", "to"    };    private static Map commonWordsMap = null;    /**     * Returns a new String array with some of the most common English words     * removed. The specific words removed are: a, and, as, at, be, do, i, if,     * in, is, it, so, the, to     */    public static final String [] removeCommonWords(String [] words) {        //See if common words map has been initialized. We don't statically        //initialize it to save some memory. Even though this a small savings,        //it adds up with hundreds of classes being loaded.        if (commonWordsMap == null) {            synchronized(initLock) {                if (commonWordsMap == null) {                    commonWordsMap = new HashMap();                    for (int i=0; i<commonWords.length; i++) {                        commonWordsMap.put(commonWords[i], commonWords[i]);                    }                }            }        }        //Now, add all words that aren't in the common map to results        ArrayList results = new ArrayList(words.length);        for (int i=0; i<words.length; i++) {            if (!commonWordsMap.containsKey(words[i])) {                results.add(words[i]);            }        }        return (String[])results.toArray(new String[results.size()]);    }    /**     * Pseudo-random number generator object for use with randomString().     * The Random class is not considered to be cryptographically secure, so     * only use these random Strings for low to medium security applications.     */    private static Random randGen = null;    /**     * Array of numbers and letters of mixed case. Numbers appear in the list     * twice so that there is a more equal chance that a number will be picked.     * We can use the array to get a random number or letter by picking a random     * array index.     */    private static char[] numbersAndLetters = null;    /**     * Returns a random String of numbers and letters of the specified length.     * The method uses the Random class that is built-in to Java which is     * suitable for low to medium grade security uses. This means that the     * output is only pseudo random, i.e., each number is mathematically     * generated so is not truly random.<p>     *     * For every character in the returned String, there is an equal chance that     * it will be a letter or number. If a letter, there is an equal chance     * that it will be lower or upper case.<p>     *     * The specified length must be at least one. If not, the method will return     * null.     *     * @param length the desired length of the random String to return.     * @return a random String of numbers and letters of the specified length.     */    public static final String randomString(int length) {        if (length < 1) {            return null;        }        //Init of pseudo random number generator.        if (randGen == null) {            synchronized (initLock) {                if (randGen == null) {                    randGen = new Random();                    //Also initialize the numbersAndLetters array                    numbersAndLetters = ("0123456789abcdefghijklmnopqrstuvwxyz" +                    "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ").toCharArray();                }            }        }        //Create a char buffer to put random letters and numbers in.        char [] randBuffer = new char[length];        for (int i=0; i<randBuffer.length; i++) {            randBuffer[i] = numbersAndLetters[randGen.nextInt(71)];        }        return new String(randBuffer);    }   /**    * Intelligently chops a String at a word boundary (whitespace) that occurs    * at the specified index in the argument or before. However, if there is a    * newline character before <code>length</code>, the String will be chopped    * there. If no newline or whitespace is found in <code>string</code> up to    * the index <code>length</code>, the String will chopped at <code>length</code>.    * <p>    * For example, chopAtWord("This is a nice String", 10) will return    * "This is a" which is the first word boundary less than or equal to 10    * characters into the original String.    *    * @param string the String to chop.    * @param length the index in <code>string</code> to start looking for a    *       whitespace boundary at.    * @return a substring of <code>string</code> whose length is less than or    *       equal to <code>length</code>, and that is chopped at whitespace.    */    public static final String chopAtWord(String string, int length) {        if (string == null) {            return string;        }        char [] charArray = string.toCharArray();        int sLength = string.length();        if (length < sLength) {            sLength = length;        }        //First check if there is a newline character before length; if so,        //chop word there.        for (int i=0; i<sLength-1; i++) {            //Windows            if (charArray[i] == '\r' && charArray[i+1] == '\n') {                return string.substring(0, i);            }            //Unix            else if (charArray[i] == '\n') {                return string.substring(0, i);            }        }        //Also check boundary case of Unix newline        if (charArray[sLength-1] == '\n') {            return string.substring(0, sLength-1);        }        //Done checking for newline, now see if the total string is less than        //the specified chop point.        if (string.length() < length) {            return string;        }        //No newline, so chop at the first whitespace.        for (int i = length-1; i > 0; i--) {            if (charArray[i] == ' ') {                return string.substring(0, i).trim();            }        }        //Did not find word boundary so return original String chopped at        //specified length.        return string.substring(0, length);    }    /**     * Highlights words in a string. Words matching ignores case. The actual     * higlighting method is specified with the start and end higlight tags.     * Those might be beginning and ending HTML bold tags, or anything else.     *     * @param string the String to highlight words in.     * @param words an array of words that should be highlighted in the string.     * @param startHighlight the tag that should be inserted to start highlighting.     * @param endHighlight the tag that should be inserted to end highlighting.     * @return a new String with the specified words highlighted.     */    public static final String highlightWords(String string, String[] words,        String startHighlight, String endHighlight)    {        if (string == null || words == null ||                startHighlight == null || endHighlight == null)        {            return null;        }        //Iterate through each word.        for (int x=0; x<words.length; x++) {            //we want to ignore case.            String lcString = string.toLowerCase();            //using a char [] is more efficient            char [] string2 = string.toCharArray();            String word = words[x].toLowerCase();            //perform specialized replace logic            int i=0;            if ( ( i=lcString.indexOf( word, i ) ) >= 0 ) {                int oLength = word.length();                StringBuffer buf = new StringBuffer(string2.length);                //we only want to highlight distinct words and not parts of                //larger words. The method used below mostly solves this. There                //are a few cases where it doesn't, but it's close enough.                boolean startSpace = false;                char startChar = ' ';                if (i-1 > 0) {                    startChar = string2[i-1];                    if (!Character.isLetter(startChar)) {                        startSpace = true;                    }                }                boolean endSpace = false;                char endChar = ' ';                if (i+oLength<string2.length) {                    endChar = string2[i+oLength];                    if (!Character.isLetter(endChar))  {                        endSpace = true;                    }                }                if ((startSpace && endSpace) || (i==0 && endSpace)) {                    buf.append(string2, 0, i);                    if (startSpace && startChar==' ') { buf.append(startChar); }                    buf.append(startHighlight);                    buf.append(string2, i, oLength).append(endHighlight);                    if (endSpace && endChar==' ') { buf.append(endChar); }                }                else {                    buf.append(string2, 0, i);                    buf.append(string2, i, oLength);                }                i += oLength;                int j = i;                while( ( i=lcString.indexOf( word, i ) ) > 0 ) {                    startSpace = false;                    startChar = string2[i-1];                    if (!Character.isLetter(startChar)) {                        startSpace = true;                    }                    endSpace = false;                    if (i+oLength<string2.length) {                        endChar = string2[i+oLength];                        if (!Character.isLetter(endChar))  {                            endSpace = true;                        }                    }                    if ((startSpace && endSpace) || i+oLength==string2.length) {                        buf.append(string2, j, i-j);                        if (startSpace && startChar==' ') { buf.append(startChar); }                        buf.append(startHighlight);                        buf.append(string2, i, oLength).append(endHighlight);                        if (endSpace && endChar==' ') { buf.append(endChar); }                    }                    else {                        buf.append(string2, j, i-j);                        buf.append(string2, i, oLength);                    }                    i += oLength;                    j = i;                }                buf.append(string2, j, string2.length - j);                string = buf.toString();            }        }        return string;    }    /**     * Escapes all necessary characters in the String so that it can be used     * in an XML doc.     *     * @param string the string to escape.     * @return the string with appropriate characters escaped.     */    public static final String escapeForXML(String string) {        //Check if the string is null or zero length -- if so, return        //what was sent in.        if (string == null || string.length() == 0 ) {            return string;        }        char [] sArray = string.toCharArray();        StringBuffer buf = new StringBuffer(sArray.length);        char ch;        for (int i=0; i<sArray.length; i++) {            ch = sArray[i];            if(ch == '<') {                buf.append("&lt;");            }            else if(ch == '>') {                buf.append("&gt;");            }            else if (ch == '"') {                buf.append("&quot;");            }            else if (ch == '&') {                buf.append("&amp;");            }            else {                buf.append(ch);            }        }        return buf.toString();    }    /**     * Unescapes the String by converting XML escape sequences back into normal     * characters.     *     * @param string the string to unescape.     * @return the string with appropriate characters unescaped.     */    public static final String unescapeFromXML(String string) {        string = replace(string, "&lt;", "<");        string = replace(string, "&gt;", ">");        string = replace(string, "&quot;", "\"");        return replace(string, "&amp;", "&");    }}
上一页 12
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -