📄 charactertranslationtest.java
字号:
ret = new StringBuffer (string.length ()); // newline instead of doublespaces index = 0; while ((index < string.length ()) && (-1 != (spaces = string.indexOf (" ", index)))) { ret.append (" // " + string.substring (index, spaces)); if (!string.substring (index, spaces).endsWith (nl)) ret.append (nl); while ((spaces < string.length ()) && Character.isWhitespace (string.charAt (spaces))) spaces++; index = spaces; } if (index < string.length ()) ret.append (" // " + string.substring (index)); return (ret.toString ()); } /** * Pad a string on the left with the given character to the length specified. * @param string The string to pad * @param character The character to pad with. * @param length The size to pad to. * @return The padded string. */ public String pad (String string, char character, int length) { StringBuffer ret; ret = new StringBuffer (length); ret.append (string); while (length > ret.length ()) ret.insert (0, character); return (ret.toString ()); } /** * Convert the textual representation of the numeric character reference to a character. * @param string The numeric character reference (in quotes). * @return The character represented by the numeric character reference. * */ public String unicode (String string) { int code; if (string.startsWith ("\"&#") && string.endsWith (";\"")) { string = string.substring (3, string.length () - 2); try { code = Integer.parseInt (string); string = "'\\u" + pad (Integer.toHexString (code), '0', 4) + "'"; } catch (Exception e) { e.printStackTrace (); } return (string); } else return (string); } /** * Parse the sgml declaration for character entity reference * name, equivalent numeric character reference and a comment. * Emit a java hash table 'put' with the name as the key, the * numeric character as the value and comment the insertion * with the comment. * @param string The contents of the sgml declaration. * @param out The sink for output. */ public void extract (String string, PrintWriter out) { int space; String token; String code; if (string.startsWith ("<!--")) out.println (pretty (string.substring (4, string.length () - 3).trim ())); else if (string.startsWith ("<!ENTITY")) { string = string.substring (8, string.length () - 3).trim (); if (-1 != (space = string.indexOf (" "))) { token = string.substring (0, space); string = string.substring (space).trim (); if (string.startsWith ("CDATA")) { string = string.substring (5).trim (); if (-1 != (space = string.indexOf (" "))) { code = string.substring (0, space).trim (); code = unicode (code); string = string.substring (space).trim (); out.println ( " new CharacterReference (\"" + token + "\"," // no token is larger than 8 characters - yet + pad (code, ' ', code.length () + 9 - token.length ()) + ")," + " // " + pack (string)); } else out.println (string); } else out.println (string); } else out.println (string); } else out.println (string); } /** * Extract special characters. * Scan the string looking for substrings of the form: * <pre> * <!ENTITY nbsp CDATA "&#160;" -- no-break space = non-breaking space, U+00A0 ISOnum --> * </pre> * and emit a java definition for each. * @param string The raw string from w3.org. * @param out The sink for output. */ public void sgml (String string, PrintWriter out) { int index; int begin; int end; index = 0; while (-1 != (begin = string.indexOf ("<", index))) { if (-1 != (end = string.indexOf ("-->", begin))) { extract (string.substring (begin, end + 3), out); index = end + 3; } else index = begin + 1; } } /** * Pull out text elements from the HTML. * @param out The sink for output. */ public void parse (PrintWriter out) throws ParserException { Node node; StringBuffer buffer = new StringBuffer (4096); // Run through an enumeration of html elements, and pick up // only those that are plain string. for (NodeIterator e = mParser.elements (); e.hasMoreNodes ();) { node = e.nextNode (); gather (node, buffer); } String text = translate (buffer.toString ()); sgml (text, out); } } public CharacterReference[] getReferences () { final String class_name = "CharacterEntityReferenceList"; String paths; String path; String source; PrintWriter out; Generate generate; SimpleClassLoader loader; Class hello; Field field; CharacterReference[] ret; ret = mReferences; if (null == ret) { paths = System.getProperty ("java.class.path"); path = System.getProperty ("user.home"); if (!path.endsWith (File.separator)) path += File.separator; source = path + class_name + ".java"; try { // create it generate = new Generate (); out = new PrintWriter (new FileWriter (source)); out.println ("import org.htmlparser.util.CharacterReference;"); out.println (); out.println ("/** Generated by " + this.getClass ().getName () + " **/"); out.println ("public class " + class_name); out.println ("{"); out.println (" /**"); out.println (" * Table mapping character to entity reference."); out.println (" */"); out.println (" public static final CharacterReference[] mCharacterReferences ="); out.println (" {"); generate.parse (out); out.println (" };"); out.println ("}"); out.close (); // compile it if (0 == com.sun.tools.javac.Main.compile (new String[] {"-classpath", paths, source})) { try { // load it loader = new SimpleClassLoader (path); hello = loader.loadClass (class_name); try { // get the references field = hello.getField ("mCharacterReferences"); ret = (CharacterReference[])field.get (null); Sort.QuickSort (ret); } catch (IllegalAccessException iae) { fail ("references not accessible"); } catch (NoSuchFieldException nsfe) { fail ("references not found"); } } catch (ClassNotFoundException cnfe) { fail ("couldn't load class"); } finally { File classfile; classfile = new File (path + class_name + ".class"); classfile.delete (); } } else fail ("couldn't compile class"); mReferences = ret; } catch (IOException ioe) { fail ("couldn't write class"); } catch (ParserException ioe) { fail ("couldn't parse w3.org entities list"); } } return (ret); } public void testInitialCharacterEntityReference () { assertEquals ( "character entity reference at start of string doesn't work", "\u00f7 is the division sign.", Translate.decode ("÷ is the division sign.")); } public void testInitialNumericCharacterReference1 () { assertEquals ( "numeric character reference at start of string doesn't work", "\u00f7 is the division sign.", Translate.decode ("÷ is the division sign.")); } public void testInitialNumericCharacterReference2 () { assertEquals ( "numeric character reference at start of string doesn't work", "\u00f7 is the division sign.", Translate.decode ("÷ is the division sign.")); } public void testInitialHexNumericCharacterReference1 () { assertEquals ( "numeric character reference at start of string doesn't work", "\u00f7 is the division sign.", Translate.decode ("÷ is the division sign.")); } public void testInitialHexNumericCharacterReference2 () { assertEquals ( "numeric character reference at start of string doesn't work", "\u00f7 is the division sign.", Translate.decode ("÷ is the division sign.")); } public void testInitialHexNumericCharacterReference3 () { assertEquals ( "numeric character reference at start of string doesn't work", "\u00f7 is the division sign.", Translate.decode ("÷ is the division sign.")); } public void testInitialHexNumericCharacterReference4 () { assertEquals ( "numeric character reference at start of string doesn't work", "\u00f7 is the division sign.", Translate.decode ("÷ is the division sign.")); } public void testInitialHexNumericCharacterReference5 () { assertEquals ( "numeric character reference at start of string doesn't work", "\u00f7 is the division sign.", Translate.decode ("÷ is the division sign.")); } public void testInitialHexNumericCharacterReference6 () { assertEquals ( "numeric character reference at start of string doesn't work", "\u00f7 is the division sign.", Translate.decode ("÷ is the division sign.")); } public void testInitialHexNumericCharacterReference7 () { assertEquals ( "numeric character reference at start of string doesn't work", "\u00f7 is the division sign.", Translate.decode ("÷ is the division sign.")); } public void testInitialHexNumericCharacterReference8 () { assertEquals ( "numeric character reference at start of string doesn't work", "\u00f7 is the division sign.", Translate.decode ("÷ is the division sign.")); } public void testInitialCharacterEntityReferenceWithoutSemi () { assertEquals ( "character entity reference without a semicolon at start of string doesn't work", "\u00f7 is the division sign.", Translate.decode ("÷ is the division sign.")); }
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -