📄 characterentityreference.java
字号:
// Jericho HTML Parser - Java based library for analysing and manipulating HTML
// Version 3.0
// Copyright (C) 2007 Martin Jericho
// http://jerichohtml.sourceforge.net/
//
// This library is free software; you can redistribute it and/or
// modify it under the terms of either one of the following licences:
//
// 1. The Eclipse Public License (EPL) version 1.0,
// included in this distribution in the file licence-epl-1.0.html
// or available at http://www.eclipse.org/legal/epl-v10.html
//
// 2. The GNU Lesser General Public License (LGPL) version 2.1 or later,
// included in this distribution in the file licence-lgpl-2.1.txt
// or available at http://www.gnu.org/licenses/lgpl.txt
//
// This library is distributed on an "AS IS" basis,
// WITHOUT WARRANTY OF ANY KIND, either express or implied.
// See the individual licence texts for more details.
package net.htmlparser.jericho;
import java.util.*;
import java.io.*;
/**
* Represents an HTML <a target="_blank" href="http://www.w3.org/TR/REC-html40/charset.html#h-5.3.2">Character Entity Reference</a>.
* <p>
* <b>Click <a href="#method_summary">here</a> to scroll down to the method summary.</b>
* <p>
* The full list of HTML character entity references can be found at the following URL:<br />
* <a target="_blank" href="http://www.w3.org/TR/REC-html40/sgml/entities.html">http://www.w3.org/TR/REC-html40/sgml/entities.html</a>.
* <p>
* There are a total of 253 HTML character entity references, ranging from codepoints U+0022 to U+2666.
* <p>
* Static methods to {@linkplain #encode(CharSequence) encode} and {@linkplain #decode(CharSequence) decode} strings
* and single characters can be found in the {@link CharacterReference} superclass.
* <p>
* The {@link #_apos &apos;} entity reference is not defined for use in HTML.
* It is defined in the <a target="_blank" href="http://www.w3.org/TR/xhtml1/dtds.html#a_dtd_Special_characters">XHTML Special Characters Entity Set</a>,
* and is the only one that is not included in both HTML and XHTML.
* For this reason, the <code>&apos;</code> entity reference is recognised by this library in decoding functions, but in encoding functions
* the numeric character reference <code>&#39;</code> is used instead.
* Most modern browsers support it in both XHTML and HTML, with the notable exception
* of Microsoft Internet Explorer 6.0, which doesn't support it in either.
* <p>
* <code>CharacterEntityReference</code> instances are obtained using one of the following methods:
* <ul>
* <li>{@link CharacterReference#parse(CharSequence characterReferenceText)}
* <li>{@link Source#getNextCharacterReference(int pos)}
* <li>{@link Source#getPreviousCharacterReference(int pos)}
* <li>{@link Segment#getAllCharacterReferences()}
* </ul>
*
* @see CharacterReference
* @see NumericCharacterReference
*/
public class CharacterEntityReference extends CharacterReference {
private String name;
/** <samp> </samp> <code>&nbsp; = &#160;</code> -- no-break space = non-breaking space, U+00A0 ISOnum. */
public static final char _nbsp='\u00A0';
/** <samp>¡</samp> <code>&iexcl; = &#161;</code> -- inverted exclamation mark, U+00A1 ISOnum. */
public static final char _iexcl='\u00A1';
/** <samp>¢</samp> <code>&cent; = &#162;</code> -- cent sign, U+00A2 ISOnum. */
public static final char _cent='\u00A2';
/** <samp>£</samp> <code>&pound; = &#163;</code> -- pound sign, U+00A3 ISOnum. */
public static final char _pound='\u00A3';
/** <samp>¤</samp> <code>&curren; = &#164;</code> -- currency sign, U+00A4 ISOnum. */
public static final char _curren='\u00A4';
/** <samp>¥</samp> <code>&yen; = &#165;</code> -- yen sign = yuan sign, U+00A5 ISOnum. */
public static final char _yen='\u00A5';
/** <samp>¦</samp> <code>&brvbar; = &#166;</code> -- broken bar = broken vertical bar, U+00A6 ISOnum. */
public static final char _brvbar='\u00A6';
/** <samp>§</samp> <code>&sect; = &#167;</code> -- section sign, U+00A7 ISOnum. */
public static final char _sect='\u00A7';
/** <samp>¨</samp> <code>&uml; = &#168;</code> -- diaeresis = spacing diaeresis, U+00A8 ISOdia. */
public static final char _uml='\u00A8';
/** <samp>©</samp> <code>&copy; = &#169;</code> -- copyright sign, U+00A9 ISOnum. */
public static final char _copy='\u00A9';
/** <samp>ª</samp> <code>&ordf; = &#170;</code> -- feminine ordinal indicator, U+00AA ISOnum. */
public static final char _ordf='\u00AA';
/** <samp>«</samp> <code>&laquo; = &#171;</code> -- left-pointing double angle quotation mark = left pointing guillemet, U+00AB ISOnum. */
public static final char _laquo='\u00AB';
/** <samp>¬</samp> <code>&not; = &#172;</code> -- not sign = angled dash, U+00AC ISOnum. */
public static final char _not='\u00AC';
/** <samp>­</samp> <code>&shy; = &#173;</code> -- soft hyphen = discretionary hyphen, U+00AD ISOnum. */
public static final char _shy='\u00AD';
/** <samp>®</samp> <code>&reg; = &#174;</code> -- registered sign = registered trade mark sign, U+00AE ISOnum. */
public static final char _reg='\u00AE';
/** <samp>¯</samp> <code>&macr; = &#175;</code> -- macron = spacing macron = overline = APL overbar, U+00AF ISOdia. */
public static final char _macr='\u00AF';
/** <samp>°</samp> <code>&deg; = &#176;</code> -- degree sign, U+00B0 ISOnum. */
public static final char _deg='\u00B0';
/** <samp>±</samp> <code>&plusmn; = &#177;</code> -- plus-minus sign = plus-or-minus sign, U+00B1 ISOnum. */
public static final char _plusmn='\u00B1';
/** <samp>²</samp> <code>&sup2; = &#178;</code> -- superscript two = superscript digit two = squared, U+00B2 ISOnum. */
public static final char _sup2='\u00B2';
/** <samp>³</samp> <code>&sup3; = &#179;</code> -- superscript three = superscript digit three = cubed, U+00B3 ISOnum. */
public static final char _sup3='\u00B3';
/** <samp>´</samp> <code>&acute; = &#180;</code> -- acute accent = spacing acute, U+00B4 ISOdia. */
public static final char _acute='\u00B4';
/** <samp>µ</samp> <code>&micro; = &#181;</code> -- micro sign, U+00B5 ISOnum. */
public static final char _micro='\u00B5';
/** <samp>¶</samp> <code>&para; = &#182;</code> -- pilcrow sign = paragraph sign, U+00B6 ISOnum. */
public static final char _para='\u00B6';
/** <samp>·</samp> <code>&middot; = &#183;</code> -- middle dot = Georgian comma = Greek middle dot, U+00B7 ISOnum. */
public static final char _middot='\u00B7';
/** <samp>¸</samp> <code>&cedil; = &#184;</code> -- cedilla = spacing cedilla, U+00B8 ISOdia. */
public static final char _cedil='\u00B8';
/** <samp>¹</samp> <code>&sup1; = &#185;</code> -- superscript one = superscript digit one, U+00B9 ISOnum. */
public static final char _sup1='\u00B9';
/** <samp>º</samp> <code>&ordm; = &#186;</code> -- masculine ordinal indicator, U+00BA ISOnum. */
public static final char _ordm='\u00BA';
/** <samp>»</samp> <code>&raquo; = &#187;</code> -- right-pointing double angle quotation mark = right pointing guillemet, U+00BB ISOnum. */
public static final char _raquo='\u00BB';
/** <samp>¼</samp> <code>&frac14; = &#188;</code> -- vulgar fraction one quarter = fraction one quarter, U+00BC ISOnum. */
public static final char _frac14='\u00BC';
/** <samp>½</samp> <code>&frac12; = &#189;</code> -- vulgar fraction one half = fraction one half, U+00BD ISOnum. */
public static final char _frac12='\u00BD';
/** <samp>¾</samp> <code>&frac34; = &#190;</code> -- vulgar fraction three quarters = fraction three quarters, U+00BE ISOnum. */
public static final char _frac34='\u00BE';
/** <samp>¿</samp> <code>&iquest; = &#191;</code> -- inverted question mark = turned question mark, U+00BF ISOnum. */
public static final char _iquest='\u00BF';
/** <samp>À</samp> <code>&Agrave; = &#192;</code> -- latin capital letter A with grave = latin capital letter A grave, U+00C0 ISOlat1. */
public static final char _Agrave='\u00C0';
/** <samp>Á</samp> <code>&Aacute; = &#193;</code> -- latin capital letter A with acute, U+00C1 ISOlat1. */
public static final char _Aacute='\u00C1';
/** <samp>Â</samp> <code>&Acirc; = &#194;</code> -- latin capital letter A with circumflex, U+00C2 ISOlat1. */
public static final char _Acirc='\u00C2';
/** <samp>Ã</samp> <code>&Atilde; = &#195;</code> -- latin capital letter A with tilde, U+00C3 ISOlat1. */
public static final char _Atilde='\u00C3';
/** <samp>Ä</samp> <code>&Auml; = &#196;</code> -- latin capital letter A with diaeresis, U+00C4 ISOlat1. */
public static final char _Auml='\u00C4';
/** <samp>Å</samp> <code>&Aring; = &#197;</code> -- latin capital letter A with ring above = latin capital letter A ring, U+00C5 ISOlat1. */
public static final char _Aring='\u00C5';
/** <samp>Æ</samp> <code>&AElig; = &#198;</code> -- latin capital letter AE = latin capital ligature AE, U+00C6 ISOlat1. */
public static final char _AElig='\u00C6';
/** <samp>Ç</samp> <code>&Ccedil; = &#199;</code> -- latin capital letter C with cedilla, U+00C7 ISOlat1. */
public static final char _Ccedil='\u00C7';
/** <samp>È</samp> <code>&Egrave; = &#200;</code> -- latin capital letter E with grave, U+00C8 ISOlat1. */
public static final char _Egrave='\u00C8';
/** <samp>É</samp> <code>&Eacute; = &#201;</code> -- latin capital letter E with acute, U+00C9 ISOlat1. */
public static final char _Eacute='\u00C9';
/** <samp>Ê</samp> <code>&Ecirc; = &#202;</code> -- latin capital letter E with circumflex, U+00CA ISOlat1. */
public static final char _Ecirc='\u00CA';
/** <samp>Ë</samp> <code>&Euml; = &#203;</code> -- latin capital letter E with diaeresis, U+00CB ISOlat1. */
public static final char _Euml='\u00CB';
/** <samp>Ì</samp> <code>&Igrave; = &#204;</code> -- latin capital letter I with grave, U+00CC ISOlat1. */
public static final char _Igrave='\u00CC';
/** <samp>Í</samp> <code>&Iacute; = &#205;</code> -- latin capital letter I with acute, U+00CD ISOlat1. */
public static final char _Iacute='\u00CD';
/** <samp>Î</samp> <code>&Icirc; = &#206;</code> -- latin capital letter I with circumflex, U+00CE ISOlat1. */
public static final char _Icirc='\u00CE';
/** <samp>Ï</samp> <code>&Iuml; = &#207;</code> -- latin capital letter I with diaeresis, U+00CF ISOlat1. */
public static final char _Iuml='\u00CF';
/** <samp>Ð</samp> <code>&ETH; = &#208;</code> -- latin capital letter ETH, U+00D0 ISOlat1. */
public static final char _ETH='\u00D0';
/** <samp>Ñ</samp> <code>&Ntilde; = &#209;</code> -- latin capital letter N with tilde, U+00D1 ISOlat1. */
public static final char _Ntilde='\u00D1';
/** <samp>Ò</samp> <code>&Ograve; = &#210;</code> -- latin capital letter O with grave, U+00D2 ISOlat1. */
public static final char _Ograve='\u00D2';
/** <samp>Ó</samp> <code>&Oacute; = &#211;</code> -- latin capital letter O with acute, U+00D3 ISOlat1. */
public static final char _Oacute='\u00D3';
/** <samp>Ô</samp> <code>&Ocirc; = &#212;</code> -- latin capital letter O with circumflex, U+00D4 ISOlat1. */
public static final char _Ocirc='\u00D4';
/** <samp>Õ</samp> <code>&Otilde; = &#213;</code> -- latin capital letter O with tilde, U+00D5 ISOlat1. */
public static final char _Otilde='\u00D5';
/** <samp>Ö</samp> <code>&Ouml; = &#214;</code> -- latin capital letter O with diaeresis, U+00D6 ISOlat1. */
public static final char _Ouml='\u00D6';
/** <samp>×</samp> <code>&times; = &#215;</code> -- multiplication sign, U+00D7 ISOnum. */
public static final char _times='\u00D7';
/** <samp>Ø</samp> <code>&Oslash; = &#216;</code> -- latin capital letter O with stroke = latin capital letter O slash, U+00D8 ISOlat1. */
public static final char _Oslash='\u00D8';
/** <samp>Ù</samp> <code>&Ugrave; = &#217;</code> -- latin capital letter U with grave, U+00D9 ISOlat1. */
public static final char _Ugrave='\u00D9';
/** <samp>Ú</samp> <code>&Uacute; = &#218;</code> -- latin capital letter U with acute, U+00DA ISOlat1. */
public static final char _Uacute='\u00DA';
/** <samp>Û</samp> <code>&Ucirc; = &#219;</code> -- latin capital letter U with circumflex, U+00DB ISOlat1. */
public static final char _Ucirc='\u00DB';
/** <samp>Ü</samp> <code>&Uuml; = &#220;</code> -- latin capital letter U with diaeresis, U+00DC ISOlat1. */
public static final char _Uuml='\u00DC';
/** <samp>Ý</samp> <code>&Yacute; = &#221;</code> -- latin capital letter Y with acute, U+00DD ISOlat1. */
public static final char _Yacute='\u00DD';
/** <samp>Þ</samp> <code>&THORN; = &#222;</code> -- latin capital letter THORN, U+00DE ISOlat1. */
public static final char _THORN='\u00DE';
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -