📄 characterreference.java
字号:
* This is equivalent to {@link #parse(CharSequence) parse(characterReferenceText)}<code>.</code>{@link #getCodePoint()},
* except that it returns {@link #INVALID_CODE_POINT} if an invalid character reference is specified instead of throwing a
* <code>NullPointerException</code>.
* <p>
* <dl>
* <dt>Example:</dt>
* <dd><code>CharacterReference.getCodePointFromCharacterReferenceString("&gt;")</code> returns <code>38</code></dd>
* </dl>
*
* @param characterReferenceText the text containing a single encoded character reference.
* @return the unicode code point representing representing the specified text, or {@link #INVALID_CODE_POINT} if the text does not represent a valid character reference.
*/
public static int getCodePointFromCharacterReferenceString(final CharSequence characterReferenceText) {
final CharacterReference characterReference=parse(characterReferenceText);
return (characterReference!=null) ? characterReference.getCodePoint() : INVALID_CODE_POINT;
}
/**
* Indicates whether the specified character would need to be encoded in HTML text.
* <p>
* This is the case if a {@linkplain CharacterEntityReference character entity reference} exists for the character, or the unicode code point is greater than U+007F.
* <p>
* The only exception to this is an {@linkplain CharacterEntityReference#_apos apostrophe} (U+0027),
* which only returns <code>true</code> if the static {@link Config#IsApostropheEncoded} property
* is currently set to <code>true</code>.
*
* @param ch the character to test.
* @return <code>true</code> if the specified character would need to be encoded in HTML text, otherwise <code>false</code>.
*/
public static final boolean requiresEncoding(final char ch) {
return ch>127 || (CharacterEntityReference.getName(ch)!=null && (ch!='\'' || Config.IsApostropheEncoded));
}
/**
* Returns a filter <code>Writer</code> that {@linkplain #encode(CharSequence) encodes} all text before passing it through to the specified <code>Writer</code>.
*
* @param writer the destination for the encoded text
* @return a filter <code>Writer</code> that {@linkplain #encode(CharSequence) encodes} all text before passing it through to the specified <code>Writer</code>.
* @see #encode(CharSequence unencodedText)
*/
public static Writer getEncodingFilterWriter(final Writer writer) {
return new EncodingFilterWriter(writer);
}
private static final class EncodingFilterWriter extends FilterWriter {
StringBuilder sb=new StringBuilder(MAX_ENTITY_REFERENCE_LENGTH);
public EncodingFilterWriter(final Writer writer) {
super(writer);
}
public void write(final char ch) throws IOException {
sb.setLength(0);
appendEncode(sb,ch);
if (sb.length()==1)
out.write(sb.charAt(0));
else
out.append(sb);
}
public void write(final int chInt) throws IOException {
write((char)chInt);
}
public void write(final char[] cbuf, final int off, final int len) throws IOException {
final int end=off+len;
for (int i=off; i<end; i++) write(cbuf[i]);
}
public void write(final String str, final int off, final int len) throws IOException {
final int end=off+len;
for (int i=off; i<end; i++) write(str.charAt(i));
}
}
private static Appendable appendEncode(final Appendable appendable, char ch) throws IOException {
if (appendEncodeCheckForWhiteSpaceFormatting(appendable,ch,false)) return appendable;
return appendable.append(ch);
}
static Appendable appendEncode(final Appendable appendable, CharSequence unencodedText, final boolean whiteSpaceFormatting) throws IOException {
if (unencodedText==null) return appendable;
int beginPos=0;
int endPos=unencodedText.length();
if (unencodedText instanceof Segment) {
// this might improve performance slightly
final Segment segment=(Segment)unencodedText;
final int segmentOffset=segment.getBegin();
beginPos=segmentOffset;
endPos+=segmentOffset;
unencodedText=segment.source.string;
}
final boolean isApostropheEncoded=Config.IsApostropheEncoded;
for (int i=beginPos; i<endPos; i++) {
char ch=unencodedText.charAt(i);
if (appendEncodeCheckForWhiteSpaceFormatting(appendable,ch,whiteSpaceFormatting)) continue;
// need to process white space
// whiteSpaceFormatting tries to simulate the formatting characters by converting them to markup
int spaceCount;
int nexti=i+1;
if (ch!=' ') {
if (ch!='\t') {
// must be line feed, carriage return or form feed, since zero-width space should have been processed as a character reference string
if (ch=='\r' && nexti<endPos && unencodedText.charAt(nexti)=='\n') i++; // process cr/lf pair as one line break
appendable.append("<br />"); // add line break
continue;
} else {
spaceCount=TAB_LENGTH;
}
} else {
spaceCount=1;
}
while (nexti<endPos) {
ch=unencodedText.charAt(nexti);
if (ch==' ')
spaceCount+=1;
else if (ch=='\t')
spaceCount+=TAB_LENGTH;
else
break;
nexti++;
}
if (spaceCount==1) {
// handle the very common case of a single character to improve efficiency slightly
appendable.append(' ');
continue;
}
if (spaceCount%2==1) appendable.append(' '); // fist character is a space if we have an odd number of spaces
while (spaceCount>=2) {
appendable.append(" "); // use alternating and spaces to keep original number of spaces
spaceCount-=2;
}
// note that the last character is never a nbsp, so that word wrapping won't result in a nbsp before the first character in a line
i=nexti-1; // minus 1 because top level for loop will add it again
}
return appendable;
}
private static final boolean appendEncodeCheckForWhiteSpaceFormatting(final Appendable appendable, char ch, final boolean whiteSpaceFormatting) throws IOException {
final String characterEntityReferenceName=CharacterEntityReference.getName(ch);
if (characterEntityReferenceName!=null) {
if (ch=='\'') {
if (Config.IsApostropheEncoded)
appendable.append("'");
else
appendable.append(ch);
} else {
CharacterEntityReference.appendCharacterReferenceString(appendable,characterEntityReferenceName);
}
} else if (ch>127) {
appendDecimalCharacterReferenceString(appendable,ch);
} else if (!(whiteSpaceFormatting && isWhiteSpace(ch))) {
appendable.append(ch);
} else {
return false;
}
return true;
}
static CharacterReference getPrevious(final Source source, final int pos) {
return getPrevious(source,pos,Config.UnterminatedCharacterReferenceSettings.ACCEPT_ALL);
}
static CharacterReference getNext(final Source source, final int pos) {
return getNext(source,pos,Config.UnterminatedCharacterReferenceSettings.ACCEPT_ALL);
}
private static CharacterReference getPrevious(final Source source, int pos, final Config.UnterminatedCharacterReferenceSettings unterminatedCharacterReferenceSettings) {
final ParseText parseText=source.getParseText();
pos=parseText.lastIndexOf('&',pos);
while (pos!=-1) {
final CharacterReference characterReference=construct(source,pos,unterminatedCharacterReferenceSettings);
if (characterReference!=null) return characterReference;
pos=parseText.lastIndexOf('&',pos-1);
}
return null;
}
private static CharacterReference getNext(final Source source, int pos, final Config.UnterminatedCharacterReferenceSettings unterminatedCharacterReferenceSettings) {
final ParseText parseText=source.getParseText();
pos=parseText.indexOf('&',pos);
while (pos!=-1) {
final CharacterReference characterReference=construct(source,pos,unterminatedCharacterReferenceSettings);
if (characterReference!=null) return characterReference;
pos=parseText.indexOf('&',pos+1);
}
return null;
}
static final Appendable appendHexadecimalCharacterReferenceString(final Appendable appendable, final int codePoint) throws IOException {
return appendable.append("&#x").append(Integer.toString(codePoint,16)).append(';');
}
static final Appendable appendDecimalCharacterReferenceString(final Appendable appendable, final int codePoint) throws IOException {
return appendable.append("&#").append(Integer.toString(codePoint)).append(';');
}
private static CharacterReference construct(final Source source, final int begin, final Config.UnterminatedCharacterReferenceSettings unterminatedCharacterReferenceSettings) {
try {
if (source.getParseText().charAt(begin)!='&') return null;
return (source.getParseText().charAt(begin+1)=='#')
? NumericCharacterReference.construct(source,begin,unterminatedCharacterReferenceSettings)
: CharacterEntityReference.construct(source,begin,unterminatedCharacterReferenceSettings.characterEntityReferenceMaxCodePoint);
} catch (IndexOutOfBoundsException ex) {
return null;
}
}
private static Appendable appendDecode(final Appendable appendable, final CharSequence encodedText, int pos, final boolean insideAttributeValue, final boolean convertNonBreakingSpaces) throws IOException {
final Config.UnterminatedCharacterReferenceSettings unterminatedCharacterReferenceSettings=Config.CurrentCompatibilityMode.getUnterminatedCharacterReferenceSettings(insideAttributeValue);
int lastEnd=0;
final Source source=new Source(encodedText);
while (true) {
final CharacterReference characterReference=getNext(source,pos,unterminatedCharacterReferenceSettings);
if (characterReference==null) break;
if (lastEnd!=characterReference.getBegin()) appendable.append(encodedText,lastEnd,characterReference.getBegin());
if (characterReference.getChar()==CharacterEntityReference._nbsp && convertNonBreakingSpaces) {
appendable.append(' ');
} else {
characterReference.appendTo(appendable);
}
pos=lastEnd=characterReference.getEnd();
}
if (lastEnd!=encodedText.length()) appendable.append(encodedText,lastEnd,encodedText.length());
return appendable;
}
private void appendTo(Appendable appendable) throws IOException {
if (Character.isSupplementaryCodePoint(codePoint)) {
appendable.append(getHighSurrogate(codePoint));
appendable.append(getLowSurrogate(codePoint));
} else {
appendable.append(getChar());
}
}
// pinched from http://svn.apache.org/repos/asf/abdera/java/trunk/dependencies/i18n/src/main/java/org/apache/abdera/i18n/text/CharUtils.java
private static char getHighSurrogate(int codePoint) {
return (char)((0xD800 - (0x10000 >> 10)) + (codePoint >> 10));
}
private static char getLowSurrogate(int codePoint) {
return (char)(0xDC00 + (codePoint & 0x3FF));
}
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -