⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 characterentityreference.java

📁 HTML解析器是一个Java库
💻 JAVA
📖 第 1 页 / 共 5 页
字号:
// Jericho HTML Parser - Java based library for analysing and manipulating HTML
// Version 3.0
// Copyright (C) 2007 Martin Jericho
// http://jerichohtml.sourceforge.net/
//
// This library is free software; you can redistribute it and/or
// modify it under the terms of either one of the following licences:
//
// 1. The Eclipse Public License (EPL) version 1.0,
// included in this distribution in the file licence-epl-1.0.html
// or available at http://www.eclipse.org/legal/epl-v10.html
//
// 2. The GNU Lesser General Public License (LGPL) version 2.1 or later,
// included in this distribution in the file licence-lgpl-2.1.txt
// or available at http://www.gnu.org/licenses/lgpl.txt
//
// This library is distributed on an "AS IS" basis,
// WITHOUT WARRANTY OF ANY KIND, either express or implied.
// See the individual licence texts for more details.

package net.htmlparser.jericho;

import java.util.*;
import java.io.*;

/**
 * Represents an HTML <a target="_blank" href="http://www.w3.org/TR/REC-html40/charset.html#h-5.3.2">Character Entity Reference</a>.
 * <p>
 * <b>Click <a href="#method_summary">here</a> to scroll down to the method summary.</b>
 * <p>
 * The full list of HTML character entity references can be found at the following URL:<br />
 * <a target="_blank" href="http://www.w3.org/TR/REC-html40/sgml/entities.html">http://www.w3.org/TR/REC-html40/sgml/entities.html</a>.
 * <p>
 * There are a total of 253 HTML character entity references, ranging from codepoints U+0022 to U+2666.
 * <p>
 * Static methods to {@linkplain #encode(CharSequence) encode} and {@linkplain #decode(CharSequence) decode} strings
 * and single characters can be found in the {@link CharacterReference} superclass.
 * <p>
 * The {@link #_apos &amp;apos;} entity reference is not defined for use in HTML.
 * It is defined in the <a target="_blank" href="http://www.w3.org/TR/xhtml1/dtds.html#a_dtd_Special_characters">XHTML Special Characters Entity Set</a>,
 * and is the only one that is not included in both HTML and XHTML.
 * For this reason, the <code>&amp;apos;</code> entity reference is recognised by this library in decoding functions, but in encoding functions
 * the numeric character reference <code>&amp;#39;</code> is used instead.
 * Most modern browsers support it in both XHTML and HTML, with the notable exception
 * of Microsoft Internet Explorer 6.0, which doesn't support it in either.
 * <p>
 * <code>CharacterEntityReference</code> instances are obtained using one of the following methods:
 * <ul>
 *  <li>{@link CharacterReference#parse(CharSequence characterReferenceText)}
 *  <li>{@link Source#getNextCharacterReference(int pos)}
 *  <li>{@link Source#getPreviousCharacterReference(int pos)}
 *  <li>{@link Segment#getAllCharacterReferences()}
 * </ul>
 *
 * @see CharacterReference
 * @see NumericCharacterReference
 */
public class CharacterEntityReference extends CharacterReference {
	private String name;

	/** <samp>&nbsp;</samp> <code>&amp;nbsp; = &amp;#160;</code> -- no-break space = non-breaking space, U+00A0 ISOnum. */
	public static final char _nbsp='\u00A0';
	/** <samp>&iexcl;</samp> <code>&amp;iexcl; = &amp;#161;</code> -- inverted exclamation mark, U+00A1 ISOnum. */
	public static final char _iexcl='\u00A1';
	/** <samp>&cent;</samp> <code>&amp;cent; = &amp;#162;</code> -- cent sign, U+00A2 ISOnum. */
	public static final char _cent='\u00A2';
	/** <samp>&pound;</samp> <code>&amp;pound; = &amp;#163;</code> -- pound sign, U+00A3 ISOnum. */
	public static final char _pound='\u00A3';
	/** <samp>&curren;</samp> <code>&amp;curren; = &amp;#164;</code> -- currency sign, U+00A4 ISOnum. */
	public static final char _curren='\u00A4';
	/** <samp>&yen;</samp> <code>&amp;yen; = &amp;#165;</code> -- yen sign = yuan sign, U+00A5 ISOnum. */
	public static final char _yen='\u00A5';
	/** <samp>&brvbar;</samp> <code>&amp;brvbar; = &amp;#166;</code> -- broken bar = broken vertical bar, U+00A6 ISOnum. */
	public static final char _brvbar='\u00A6';
	/** <samp>&sect;</samp> <code>&amp;sect; = &amp;#167;</code> -- section sign, U+00A7 ISOnum. */
	public static final char _sect='\u00A7';
	/** <samp>&uml;</samp> <code>&amp;uml; = &amp;#168;</code> -- diaeresis = spacing diaeresis, U+00A8 ISOdia. */
	public static final char _uml='\u00A8';
	/** <samp>&copy;</samp> <code>&amp;copy; = &amp;#169;</code> -- copyright sign, U+00A9 ISOnum. */
	public static final char _copy='\u00A9';
	/** <samp>&ordf;</samp> <code>&amp;ordf; = &amp;#170;</code> -- feminine ordinal indicator, U+00AA ISOnum. */
	public static final char _ordf='\u00AA';
	/** <samp>&laquo;</samp> <code>&amp;laquo; = &amp;#171;</code> -- left-pointing double angle quotation mark = left pointing guillemet, U+00AB ISOnum. */
	public static final char _laquo='\u00AB';
	/** <samp>&not;</samp> <code>&amp;not; = &amp;#172;</code> -- not sign = angled dash, U+00AC ISOnum. */
	public static final char _not='\u00AC';
	/** <samp>&shy;</samp> <code>&amp;shy; = &amp;#173;</code> -- soft hyphen = discretionary hyphen, U+00AD ISOnum. */
	public static final char _shy='\u00AD';
	/** <samp>&reg;</samp> <code>&amp;reg; = &amp;#174;</code> -- registered sign = registered trade mark sign, U+00AE ISOnum. */
	public static final char _reg='\u00AE';
	/** <samp>&macr;</samp> <code>&amp;macr; = &amp;#175;</code> -- macron = spacing macron = overline = APL overbar, U+00AF ISOdia. */
	public static final char _macr='\u00AF';
	/** <samp>&deg;</samp> <code>&amp;deg; = &amp;#176;</code> -- degree sign, U+00B0 ISOnum. */
	public static final char _deg='\u00B0';
	/** <samp>&plusmn;</samp> <code>&amp;plusmn; = &amp;#177;</code> -- plus-minus sign = plus-or-minus sign, U+00B1 ISOnum. */
	public static final char _plusmn='\u00B1';
	/** <samp>&sup2;</samp> <code>&amp;sup2; = &amp;#178;</code> -- superscript two = superscript digit two = squared, U+00B2 ISOnum. */
	public static final char _sup2='\u00B2';
	/** <samp>&sup3;</samp> <code>&amp;sup3; = &amp;#179;</code> -- superscript three = superscript digit three = cubed, U+00B3 ISOnum. */
	public static final char _sup3='\u00B3';
	/** <samp>&acute;</samp> <code>&amp;acute; = &amp;#180;</code> -- acute accent = spacing acute, U+00B4 ISOdia. */
	public static final char _acute='\u00B4';
	/** <samp>&micro;</samp> <code>&amp;micro; = &amp;#181;</code> -- micro sign, U+00B5 ISOnum. */
	public static final char _micro='\u00B5';
	/** <samp>&para;</samp> <code>&amp;para; = &amp;#182;</code> -- pilcrow sign = paragraph sign, U+00B6 ISOnum. */
	public static final char _para='\u00B6';
	/** <samp>&middot;</samp> <code>&amp;middot; = &amp;#183;</code> -- middle dot = Georgian comma = Greek middle dot, U+00B7 ISOnum. */
	public static final char _middot='\u00B7';
	/** <samp>&cedil;</samp> <code>&amp;cedil; = &amp;#184;</code> -- cedilla = spacing cedilla, U+00B8 ISOdia. */
	public static final char _cedil='\u00B8';
	/** <samp>&sup1;</samp> <code>&amp;sup1; = &amp;#185;</code> -- superscript one = superscript digit one, U+00B9 ISOnum. */
	public static final char _sup1='\u00B9';
	/** <samp>&ordm;</samp> <code>&amp;ordm; = &amp;#186;</code> -- masculine ordinal indicator, U+00BA ISOnum. */
	public static final char _ordm='\u00BA';
	/** <samp>&raquo;</samp> <code>&amp;raquo; = &amp;#187;</code> -- right-pointing double angle quotation mark = right pointing guillemet, U+00BB ISOnum. */
	public static final char _raquo='\u00BB';
	/** <samp>&frac14;</samp> <code>&amp;frac14; = &amp;#188;</code> -- vulgar fraction one quarter = fraction one quarter, U+00BC ISOnum. */
	public static final char _frac14='\u00BC';
	/** <samp>&frac12;</samp> <code>&amp;frac12; = &amp;#189;</code> -- vulgar fraction one half = fraction one half, U+00BD ISOnum. */
	public static final char _frac12='\u00BD';
	/** <samp>&frac34;</samp> <code>&amp;frac34; = &amp;#190;</code> -- vulgar fraction three quarters = fraction three quarters, U+00BE ISOnum. */
	public static final char _frac34='\u00BE';
	/** <samp>&iquest;</samp> <code>&amp;iquest; = &amp;#191;</code> -- inverted question mark = turned question mark, U+00BF ISOnum. */
	public static final char _iquest='\u00BF';
	/** <samp>&Agrave;</samp> <code>&amp;Agrave; = &amp;#192;</code> -- latin capital letter A with grave = latin capital letter A grave, U+00C0 ISOlat1. */
	public static final char _Agrave='\u00C0';
	/** <samp>&Aacute;</samp> <code>&amp;Aacute; = &amp;#193;</code> -- latin capital letter A with acute, U+00C1 ISOlat1. */
	public static final char _Aacute='\u00C1';
	/** <samp>&Acirc;</samp> <code>&amp;Acirc; = &amp;#194;</code> -- latin capital letter A with circumflex, U+00C2 ISOlat1. */
	public static final char _Acirc='\u00C2';
	/** <samp>&Atilde;</samp> <code>&amp;Atilde; = &amp;#195;</code> -- latin capital letter A with tilde, U+00C3 ISOlat1. */
	public static final char _Atilde='\u00C3';
	/** <samp>&Auml;</samp> <code>&amp;Auml; = &amp;#196;</code> -- latin capital letter A with diaeresis, U+00C4 ISOlat1. */
	public static final char _Auml='\u00C4';
	/** <samp>&Aring;</samp> <code>&amp;Aring; = &amp;#197;</code> -- latin capital letter A with ring above = latin capital letter A ring, U+00C5 ISOlat1. */
	public static final char _Aring='\u00C5';
	/** <samp>&AElig;</samp> <code>&amp;AElig; = &amp;#198;</code> -- latin capital letter AE = latin capital ligature AE, U+00C6 ISOlat1. */
	public static final char _AElig='\u00C6';
	/** <samp>&Ccedil;</samp> <code>&amp;Ccedil; = &amp;#199;</code> -- latin capital letter C with cedilla, U+00C7 ISOlat1. */
	public static final char _Ccedil='\u00C7';
	/** <samp>&Egrave;</samp> <code>&amp;Egrave; = &amp;#200;</code> -- latin capital letter E with grave, U+00C8 ISOlat1. */
	public static final char _Egrave='\u00C8';
	/** <samp>&Eacute;</samp> <code>&amp;Eacute; = &amp;#201;</code> -- latin capital letter E with acute, U+00C9 ISOlat1. */
	public static final char _Eacute='\u00C9';
	/** <samp>&Ecirc;</samp> <code>&amp;Ecirc; = &amp;#202;</code> -- latin capital letter E with circumflex, U+00CA ISOlat1. */
	public static final char _Ecirc='\u00CA';
	/** <samp>&Euml;</samp> <code>&amp;Euml; = &amp;#203;</code> -- latin capital letter E with diaeresis, U+00CB ISOlat1. */
	public static final char _Euml='\u00CB';
	/** <samp>&Igrave;</samp> <code>&amp;Igrave; = &amp;#204;</code> -- latin capital letter I with grave, U+00CC ISOlat1. */
	public static final char _Igrave='\u00CC';
	/** <samp>&Iacute;</samp> <code>&amp;Iacute; = &amp;#205;</code> -- latin capital letter I with acute, U+00CD ISOlat1. */
	public static final char _Iacute='\u00CD';
	/** <samp>&Icirc;</samp> <code>&amp;Icirc; = &amp;#206;</code> -- latin capital letter I with circumflex, U+00CE ISOlat1. */
	public static final char _Icirc='\u00CE';
	/** <samp>&Iuml;</samp> <code>&amp;Iuml; = &amp;#207;</code> -- latin capital letter I with diaeresis, U+00CF ISOlat1. */
	public static final char _Iuml='\u00CF';
	/** <samp>&ETH;</samp> <code>&amp;ETH; = &amp;#208;</code> -- latin capital letter ETH, U+00D0 ISOlat1. */
	public static final char _ETH='\u00D0';
	/** <samp>&Ntilde;</samp> <code>&amp;Ntilde; = &amp;#209;</code> -- latin capital letter N with tilde, U+00D1 ISOlat1. */
	public static final char _Ntilde='\u00D1';
	/** <samp>&Ograve;</samp> <code>&amp;Ograve; = &amp;#210;</code> -- latin capital letter O with grave, U+00D2 ISOlat1. */
	public static final char _Ograve='\u00D2';
	/** <samp>&Oacute;</samp> <code>&amp;Oacute; = &amp;#211;</code> -- latin capital letter O with acute, U+00D3 ISOlat1. */
	public static final char _Oacute='\u00D3';
	/** <samp>&Ocirc;</samp> <code>&amp;Ocirc; = &amp;#212;</code> -- latin capital letter O with circumflex, U+00D4 ISOlat1. */
	public static final char _Ocirc='\u00D4';
	/** <samp>&Otilde;</samp> <code>&amp;Otilde; = &amp;#213;</code> -- latin capital letter O with tilde, U+00D5 ISOlat1. */
	public static final char _Otilde='\u00D5';
	/** <samp>&Ouml;</samp> <code>&amp;Ouml; = &amp;#214;</code> -- latin capital letter O with diaeresis, U+00D6 ISOlat1. */
	public static final char _Ouml='\u00D6';
	/** <samp>&times;</samp> <code>&amp;times; = &amp;#215;</code> -- multiplication sign, U+00D7 ISOnum. */
	public static final char _times='\u00D7';
	/** <samp>&Oslash;</samp> <code>&amp;Oslash; = &amp;#216;</code> -- latin capital letter O with stroke = latin capital letter O slash, U+00D8 ISOlat1. */
	public static final char _Oslash='\u00D8';
	/** <samp>&Ugrave;</samp> <code>&amp;Ugrave; = &amp;#217;</code> -- latin capital letter U with grave, U+00D9 ISOlat1. */
	public static final char _Ugrave='\u00D9';
	/** <samp>&Uacute;</samp> <code>&amp;Uacute; = &amp;#218;</code> -- latin capital letter U with acute, U+00DA ISOlat1. */
	public static final char _Uacute='\u00DA';
	/** <samp>&Ucirc;</samp> <code>&amp;Ucirc; = &amp;#219;</code> -- latin capital letter U with circumflex, U+00DB ISOlat1. */
	public static final char _Ucirc='\u00DB';
	/** <samp>&Uuml;</samp> <code>&amp;Uuml; = &amp;#220;</code> -- latin capital letter U with diaeresis, U+00DC ISOlat1. */
	public static final char _Uuml='\u00DC';
	/** <samp>&Yacute;</samp> <code>&amp;Yacute; = &amp;#221;</code> -- latin capital letter Y with acute, U+00DD ISOlat1. */
	public static final char _Yacute='\u00DD';
	/** <samp>&THORN;</samp> <code>&amp;THORN; = &amp;#222;</code> -- latin capital letter THORN, U+00DE ISOlat1. */
	public static final char _THORN='\u00DE';

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -