📄 translate.java
字号:
num = (hi - lo) + 1; while ((-1 == ret) && (lo <= hi)) { half = num / 2; mid = lo + ((0 != (num & 1)) ? half : half - 1); result = ref - array[mid].getCharacter (); if (0 == result) ret = mid; else if (0 > result) { hi = mid - 1; num = ((0 != (num & 1)) ? half : half - 1); } else { lo = mid + 1; num = half; } } if (-1 == ret) ret = lo; return (ret); } /** * Look up a reference by character. * Use a combination of direct table lookup and binary search to find * the reference corresponding to the character. * @param character The character to be looked up. * @return The entity reference for that character or <code>null</code>. */ public static CharacterReference lookup (char character) { int index; CharacterReference ret; if (character < BREAKPOINT) ret = mCharacterList[character]; else { index = lookup (mCharacterList, character, BREAKPOINT, mCharacterList.length - 1); if (index < mCharacterList.length) { ret = mCharacterList[index]; if (character != ret.getCharacter ()) ret = null; } else ret = null; } return (ret); } /** * Look up a reference by kernel. * Use a binary search on the ordered list of known references. * Since the binary search returns the position at which a new item should * be inserted, we check the references earlier in the list if there is * a failure. * @param key A character reference with the kernel set to the string * to be found. It need not be truncated at the exact end of the reference. */ protected static CharacterReference lookup (CharacterReference key) { String string; int index; String kernel; char character; CharacterReference test; CharacterReference ret; // Care should be taken here because some entity references are // prefixes of others, i.e.: // \u2209[notin] \u00ac[not] // \u00ba[ordm] \u2228[or] // \u03d6[piv] \u03c0[pi] // \u00b3[sup3] \u2283[sup] ret = null; index = Sort.bsearch (mCharacterReferences, key); string = key.getKernel (); if (index < mCharacterReferences.length) { ret = mCharacterReferences[index]; kernel = ret.getKernel (); if (!string.regionMatches ( 0, kernel, 0, kernel.length ())) { // not exact, check references starting with same character // to see if a subset matches ret = null; } } if (null == ret) { character = string.charAt (0); while (--index >= 0) { test = mCharacterReferences[index]; kernel = test.getKernel (); if (character == kernel.charAt (0)) { if (string.regionMatches ( 0, kernel, 0, kernel.length ())) { ret = test; break; } } else break; } } return (ret); } /** * Look up a reference by kernel. * Use a binary search on the ordered list of known references. * <em>This is not very efficient, use {@link org.htmlparser.util.Translate#lookup(org.htmlparser.util.CharacterReference) lookup(CharacterReference)} * instead.</em> * @param kernel The string to lookup, i.e. "amp". * @param start The starting point in the string of the kernel. * @param end The ending point in the string of the kernel. * This should be the index of the semicolon if it exists, or failing that, * at least an index past the last character of the kernel. * @return The reference that matches the given string, or <code>null</code> * if it wasn't found. */ public static CharacterReference lookup (String kernel, int start, int end) { CharacterReferenceEx probe; probe = new CharacterReferenceEx (); probe.setKernel (kernel); probe.setStart (start); probe.setEnd (end); return (lookup (probe)); } /** * Decode a string containing references. * Change all numeric character reference and character entity references * to unicode characters. * @param string The string to translate. */ public static String decode (String string) { CharacterReferenceEx key; int amp; int index; int length; StringBuffer buffer; char character; int number; int radix; int i; int semi; boolean done; CharacterReference item; String ret; if (-1 == (amp = string.indexOf ('&'))) ret = string; else { key = null; index = 0; length = string.length (); buffer = new StringBuffer (length); do { // equivalent to buffer.append (string.substring (index, amp)); // but without the allocation of a new String while (index < amp) buffer.append (string.charAt (index++)); index++; if (index < length) { character = string.charAt (index); if ('#' == character) { // numeric character reference index++; number = 0; radix = 0; i = index; done = false; while ((i < length) && !done) { character = string.charAt (i); switch (character) { case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': if (0 == radix) radix = 10; number = number * radix + (character - '0'); break; case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': if (16 == radix) number = number * radix + (character - 'A' + 10); else done = true; break; case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': if (16 == radix) number = number * radix + (character - 'a' + 10); else done = true; break; case 'x': case 'X': if (0 == radix) radix = 16; else done = true; break; case ';': done = true; i++; break; default: done = true; break; } if (!done) i++; } if (0 != number) { buffer.append ((char)number); index = i; amp = index; } } else if (Character.isLetter (character)) // really can't start with a digit eh... { // character entity reference i = index + 1; done = false; semi = length; while ((i < length) && !done) { character = string.charAt (i); if (';' == character) { done = true; semi = i; i++; } else if (Character.isLetterOrDigit (character)) i++; else { done = true; semi = i; } } // new CharacterReference (string.substring (index, semi), 0);
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -