📄 stringutils.java
字号:
* A list of some of the most common words. For searching and indexing, we * often want to filter out these words since they just confuse searches. * The list was not created scientifically so may be incomplete :) */ private static final String [] commonWords = new String [] { "a", "and", "as", "at", "be", "do", "i", "if", "in", "is", "it", "so", "the", "to" }; private static Map commonWordsMap = null; /** * Returns a new String array with some of the most common English words * removed. The specific words removed are: a, and, as, at, be, do, i, if, * in, is, it, so, the, to */ public static final String [] removeCommonWords(String [] words) { //See if common words map has been initialized. We don't statically //initialize it to save some memory. Even though this a small savings, //it adds up with hundreds of classes being loaded. if (commonWordsMap == null) { synchronized(initLock) { if (commonWordsMap == null) { commonWordsMap = new HashMap(); for (int i=0; i<commonWords.length; i++) { commonWordsMap.put(commonWords[i], commonWords[i]); } } } } //Now, add all words that aren't in the common map to results ArrayList results = new ArrayList(words.length); for (int i=0; i<words.length; i++) { if (!commonWordsMap.containsKey(words[i])) { results.add(words[i]); } } return (String[])results.toArray(new String[results.size()]); } /** * Pseudo-random number generator object for use with randomString(). * The Random class is not considered to be cryptographically secure, so * only use these random Strings for low to medium security applications. */ private static Random randGen = null; /** * Array of numbers and letters of mixed case. Numbers appear in the list * twice so that there is a more equal chance that a number will be picked. * We can use the array to get a random number or letter by picking a random * array index. */ private static char[] numbersAndLetters = null; /** * Returns a random String of numbers and letters of the specified length. * The method uses the Random class that is built-in to Java which is * suitable for low to medium grade security uses. This means that the * output is only pseudo random, i.e., each number is mathematically * generated so is not truly random.<p> * * For every character in the returned String, there is an equal chance that * it will be a letter or number. If a letter, there is an equal chance * that it will be lower or upper case.<p> * * The specified length must be at least one. If not, the method will return * null. * * @param length the desired length of the random String to return. * @return a random String of numbers and letters of the specified length. */ public static final String randomString(int length) { if (length < 1) { return null; } //Init of pseudo random number generator. if (randGen == null) { synchronized (initLock) { if (randGen == null) { randGen = new Random(); //Also initialize the numbersAndLetters array numbersAndLetters = ("0123456789abcdefghijklmnopqrstuvwxyz" + "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ").toCharArray(); } } } //Create a char buffer to put random letters and numbers in. char [] randBuffer = new char[length]; for (int i=0; i<randBuffer.length; i++) { randBuffer[i] = numbersAndLetters[randGen.nextInt(71)]; } return new String(randBuffer); } /** * Intelligently chops a String at a word boundary (whitespace) that occurs * at the specified index in the argument or before. However, if there is a * newline character before <code>length</code>, the String will be chopped * there. If no newline or whitespace is found in <code>string</code> up to * the index <code>length</code>, the String will chopped at <code>length</code>. * <p> * For example, chopAtWord("This is a nice String", 10) will return * "This is a" which is the first word boundary less than or equal to 10 * characters into the original String. * * @param string the String to chop. * @param length the index in <code>string</code> to start looking for a * whitespace boundary at. * @return a substring of <code>string</code> whose length is less than or * equal to <code>length</code>, and that is chopped at whitespace. */ public static final String chopAtWord(String string, int length) { if (string == null) { return string; } char [] charArray = string.toCharArray(); int sLength = string.length(); if (length < sLength) { sLength = length; } //First check if there is a newline character before length; if so, //chop word there. for (int i=0; i<sLength-1; i++) { //Windows if (charArray[i] == '\r' && charArray[i+1] == '\n') { return string.substring(0, i); } //Unix else if (charArray[i] == '\n') { return string.substring(0, i); } } //Also check boundary case of Unix newline if (charArray[sLength-1] == '\n') { return string.substring(0, sLength-1); } //Done checking for newline, now see if the total string is less than //the specified chop point. if (string.length() < length) { return string; } //No newline, so chop at the first whitespace. for (int i = length-1; i > 0; i--) { if (charArray[i] == ' ') { return string.substring(0, i).trim(); } } //Did not find word boundary so return original String chopped at //specified length. return string.substring(0, length); } /** * Highlights words in a string. Words matching ignores case. The actual * higlighting method is specified with the start and end higlight tags. * Those might be beginning and ending HTML bold tags, or anything else. * * @param string the String to highlight words in. * @param words an array of words that should be highlighted in the string. * @param startHighlight the tag that should be inserted to start highlighting. * @param endHighlight the tag that should be inserted to end highlighting. * @return a new String with the specified words highlighted. */ public static final String highlightWords(String string, String[] words, String startHighlight, String endHighlight) { if (string == null || words == null || startHighlight == null || endHighlight == null) { return null; } //Iterate through each word. for (int x=0; x<words.length; x++) { //we want to ignore case. String lcString = string.toLowerCase(); //using a char [] is more efficient char [] string2 = string.toCharArray(); String word = words[x].toLowerCase(); //perform specialized replace logic int i=0; if ( ( i=lcString.indexOf( word, i ) ) >= 0 ) { int oLength = word.length(); StringBuffer buf = new StringBuffer(string2.length); //we only want to highlight distinct words and not parts of //larger words. The method used below mostly solves this. There //are a few cases where it doesn't, but it's close enough. boolean startSpace = false; char startChar = ' '; if (i-1 > 0) { startChar = string2[i-1]; if (!Character.isLetter(startChar)) { startSpace = true; } } boolean endSpace = false; char endChar = ' '; if (i+oLength<string2.length) { endChar = string2[i+oLength]; if (!Character.isLetter(endChar)) { endSpace = true; } } if ((startSpace && endSpace) || (i==0 && endSpace)) { buf.append(string2, 0, i); if (startSpace && startChar==' ') { buf.append(startChar); } buf.append(startHighlight); buf.append(string2, i, oLength).append(endHighlight); if (endSpace && endChar==' ') { buf.append(endChar); } } else { buf.append(string2, 0, i); buf.append(string2, i, oLength); } i += oLength; int j = i; while( ( i=lcString.indexOf( word, i ) ) > 0 ) { startSpace = false; startChar = string2[i-1]; if (!Character.isLetter(startChar)) { startSpace = true; } endSpace = false; if (i+oLength<string2.length) { endChar = string2[i+oLength]; if (!Character.isLetter(endChar)) { endSpace = true; } } if ((startSpace && endSpace) || i+oLength==string2.length) { buf.append(string2, j, i-j); if (startSpace && startChar==' ') { buf.append(startChar); } buf.append(startHighlight); buf.append(string2, i, oLength).append(endHighlight); if (endSpace && endChar==' ') { buf.append(endChar); } } else { buf.append(string2, j, i-j); buf.append(string2, i, oLength); } i += oLength; j = i; } buf.append(string2, j, string2.length - j); string = buf.toString(); } } return string; } /** * Escapes all necessary characters in the String so that it can be used * in an XML doc. * * @param string the string to escape. * @return the string with appropriate characters escaped. */ public static final String escapeForXML(String string) { //Check if the string is null or zero length -- if so, return //what was sent in. if (string == null || string.length() == 0 ) { return string; } char [] sArray = string.toCharArray(); StringBuffer buf = new StringBuffer(sArray.length); char ch; for (int i=0; i<sArray.length; i++) { ch = sArray[i]; if(ch == '<') { buf.append("<"); } else if(ch == '>') { buf.append(">"); } else if (ch == '"') { buf.append("""); } else if (ch == '&') { buf.append("&"); } else { buf.append(ch); } } return buf.toString(); } /** * Unescapes the String by converting XML escape sequences back into normal * characters. * * @param string the string to unescape. * @return the string with appropriate characters unescaped. */ public static final String unescapeFromXML(String string) { string = replace(string, "<", "<"); string = replace(string, ">", ">"); string = replace(string, """, "\""); return replace(string, "&", "&"); }}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -