📄 stringutils.java
字号:
String [] words = new String[tokens.countTokens()];
for (int i=0; i<words.length; i++) {
words[i] = tokens.nextToken().toLowerCase();
}
return words;
}
/**
* A list of some of the most common words. For searching and indexing, we
* often want to filter out these words since they just confuse searches.
* The list was not created scientifically so may be incomplete :)
*/
private static final String [] commonWords = new String [] {
"a", "and", "as", "at", "be", "do", "i", "if", "in", "is", "it", "so",
"the", "to"
};
private static Map commonWordsMap = null;
/**
* Returns a new String array with some of the most common English words
* removed. The specific words removed are: a, and, as, at, be, do, i, if,
* in, is, it, so, the, to
*/
public static final String [] removeCommonWords(String [] words) {
//See if common words map has been initialized. We don't statically
//initialize it to save some memory. Even though this a small savings,
//it adds up with hundreds of classes being loaded.
if (commonWordsMap == null) {
synchronized(initLock) {
if (commonWordsMap == null) {
commonWordsMap = new HashMap();
for (int i=0; i<commonWords.length; i++) {
commonWordsMap.put(commonWords[i], commonWords[i]);
}
}
}
}
//Now, add all words that aren't in the common map to results
ArrayList results = new ArrayList(words.length);
for (int i=0; i<words.length; i++) {
if (!commonWordsMap.containsKey(words[i])) {
results.add(words[i]);
}
}
return (String[])results.toArray(new String[results.size()]);
}
/**
* Pseudo-random number generator object for use with randomString().
* The Random class is not considered to be cryptographically secure, so
* only use these random Strings for low to medium security applications.
*/
private static Random randGen = null;
/**
* Array of numbers and letters of mixed case. Numbers appear in the list
* twice so that there is a more equal chance that a number will be picked.
* We can use the array to get a random number or letter by picking a random
* array index.
*/
private static char[] numbersAndLetters = null;
/**
* Returns a random String of numbers and letters of the specified length.
* The method uses the Random class that is built-in to Java which is
* suitable for low to medium grade security uses. This means that the
* output is only pseudo random, i.e., each number is mathematically
* generated so is not truly random.<p>
*
* For every character in the returned String, there is an equal chance that
* it will be a letter or number. If a letter, there is an equal chance
* that it will be lower or upper case.<p>
*
* The specified length must be at least one. If not, the method will return
* null.
*
* @param length the desired length of the random String to return.
* @return a random String of numbers and letters of the specified length.
*/
public static final String randomString(int length) {
if (length < 1) {
return null;
}
//Init of pseudo random number generator.
if (randGen == null) {
synchronized (initLock) {
if (randGen == null) {
randGen = new Random();
//Also initialize the numbersAndLetters array
numbersAndLetters = ("0123456789abcdefghijklmnopqrstuvwxyz" +
"0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ").toCharArray();
}
}
}
//Create a char buffer to put random letters and numbers in.
char [] randBuffer = new char[length];
for (int i=0; i<randBuffer.length; i++) {
randBuffer[i] = numbersAndLetters[randGen.nextInt(71)];
}
return new String(randBuffer);
}
/**
* Intelligently chops a String at a word boundary (whitespace) that occurs
* at the specified index in the argument or before. However, if there is a
* newline character before <code>length</code>, the String will be chopped
* there. If no newline or whitespace is found in <code>string</code> up to
* the index <code>length</code>, the String will chopped at <code>length</code>.
* <p>
* For example, chopAtWord("This is a nice String", 10) will return
* "This is a" which is the first word boundary less than or equal to 10
* characters into the original String.
*
* @param string the String to chop.
* @param length the index in <code>string</code> to start looking for a
* whitespace boundary at.
* @return a substring of <code>string</code> whose length is less than or
* equal to <code>length</code>, and that is chopped at whitespace.
*/
public static final String chopAtWord(String string, int length) {
if (string == null) {
return string;
}
char [] charArray = string.toCharArray();
int sLength = string.length();
if (length < sLength) {
sLength = length;
}
//First check if there is a newline character before length; if so,
//chop word there.
for (int i=0; i<sLength-1; i++) {
//Windows
if (charArray[i] == '\r' && charArray[i+1] == '\n') {
return string.substring(0, i);
}
//Unix
else if (charArray[i] == '\n') {
return string.substring(0, i);
}
}
//Also check boundary case of Unix newline
if (charArray[sLength-1] == '\n') {
return string.substring(0, sLength-1);
}
//Done checking for newline, now see if the total string is less than
//the specified chop point.
if (string.length() < length) {
return string;
}
//No newline, so chop at the first whitespace.
for (int i = length-1; i > 0; i--) {
if (charArray[i] == ' ') {
return string.substring(0, i).trim();
}
}
//Did not find word boundary so return original String chopped at
//specified length.
return string.substring(0, length);
}
/**
* Highlights words in a string. Words matching ignores case. The actual
* higlighting method is specified with the start and end higlight tags.
* Those might be beginning and ending HTML bold tags, or anything else.
*
* @param string the String to highlight words in.
* @param words an array of words that should be highlighted in the string.
* @param startHighlight the tag that should be inserted to start highlighting.
* @param endHighlight the tag that should be inserted to end highlighting.
* @return a new String with the specified words highlighted.
*/
public static final String highlightWords(String string, String[] words,
String startHighlight, String endHighlight)
{
if (string == null || words == null ||
startHighlight == null || endHighlight == null)
{
return null;
}
//Iterate through each word.
for (int x=0; x<words.length; x++) {
//we want to ignore case.
String lcString = string.toLowerCase();
//using a char [] is more efficient
char [] string2 = string.toCharArray();
String word = words[x].toLowerCase();
//perform specialized replace logic
int i=0;
if ( ( i=lcString.indexOf( word, i ) ) >= 0 ) {
int oLength = word.length();
StringBuffer buf = new StringBuffer(string2.length);
//we only want to highlight distinct words and not parts of
//larger words. The method used below mostly solves this. There
//are a few cases where it doesn't, but it's close enough.
boolean startSpace = false;
char startChar = ' ';
if (i-1 > 0) {
startChar = string2[i-1];
if (!Character.isLetter(startChar)) {
startSpace = true;
}
}
boolean endSpace = false;
char endChar = ' ';
if (i+oLength<string2.length) {
endChar = string2[i+oLength];
if (!Character.isLetter(endChar)) {
endSpace = true;
}
}
if ((startSpace && endSpace) || (i==0 && endSpace)) {
buf.append(string2, 0, i);
if (startSpace && startChar==' ') { buf.append(startChar); }
buf.append(startHighlight);
buf.append(string2, i, oLength).append(endHighlight);
if (endSpace && endChar==' ') { buf.append(endChar); }
}
else {
buf.append(string2, 0, i);
buf.append(string2, i, oLength);
}
i += oLength;
int j = i;
while( ( i=lcString.indexOf( word, i ) ) > 0 ) {
startSpace = false;
startChar = string2[i-1];
if (!Character.isLetter(startChar)) {
startSpace = true;
}
endSpace = false;
if (i+oLength<string2.length) {
endChar = string2[i+oLength];
if (!Character.isLetter(endChar)) {
endSpace = true;
}
}
if ((startSpace && endSpace) || i+oLength==string2.length) {
buf.append(string2, j, i-j);
if (startSpace && startChar==' ') { buf.append(startChar); }
buf.append(startHighlight);
buf.append(string2, i, oLength).append(endHighlight);
if (endSpace && endChar==' ') { buf.append(endChar); }
}
else {
buf.append(string2, j, i-j);
buf.append(string2, i, oLength);
}
i += oLength;
j = i;
}
buf.append(string2, j, string2.length - j);
string = buf.toString();
}
}
return string;
}
/**
* Escapes all necessary characters in the String so that it can be used
* in an XML doc.
*
* @param string the string to escape.
* @return the string with appropriate characters escaped.
*/
public static final String escapeForXML(String string) {
//Check if the string is null or zero length -- if so, return
//what was sent in.
if (string == null || string.length() == 0 ) {
return string;
}
char [] sArray = string.toCharArray();
StringBuffer buf = new StringBuffer(sArray.length);
char ch;
for (int i=0; i<sArray.length; i++) {
ch = sArray[i];
if(ch == '<') {
buf.append("<");
}
else if (ch == '&') {
buf.append("&");
}
else if (ch == '"') {
buf.append(""");
}
else {
buf.append(ch);
}
}
return buf.toString();
}
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -