📄 outputdocument.java

📁 HTML解析器是一个Java库
💻 JAVA
📖 第 1 页 / 共 2 页
字号:
上一页 12
	 * The use of <code>LinkedHashMap</code> to implement the map ensures (probably unnecessarily) that
	 * existing attributes are output in the same order as they appear in the source document, and new
	 * attributes are output in the same order as they are added.
	 * <p>
	 * <dl>
	 *  <dt>Example:</dt>
	 *  <dd><pre>
	 *  Source source=new Source(htmlDocument);
	 *  Attributes bodyAttributes
	 *    =source.getNextStartTag(0,HTMLElementName.BODY).getAttributes();
	 *  OutputDocument outputDocument=new OutputDocument(source);
	 *  Map&lt;String,String&gt; attributesMap=outputDocument.replace(bodyAttributes,true);
	 *  attributesMap.put("bgcolor","green");
	 *  String htmlDocumentWithGreenBackground=outputDocument.toString();</pre></dl>
	 *
	 * @param attributes  the <code>Attributes</code> segment defining the span of the segment and initial name/value entries of the returned map.
	 * @param convertNamesToLowerCase  specifies whether all attribute names are converted to lower case in the map.
	 * @return a <code>Map</code> containing the name/value entries to be output.
	 * @see #replace(Attributes,Map)
	 */
	public Map<String,String> replace(final Attributes attributes, boolean convertNamesToLowerCase) {
		AttributesOutputSegment attributesOutputSegment=new AttributesOutputSegment(attributes,convertNamesToLowerCase);
		register(attributesOutputSegment);
		return attributesOutputSegment.getMap();
	}

	/**
	 * Replaces the specified attributes segment in this source document with the name/value entries in the specified <code>Map</code>.
	 * <p>
	 * This method might be used if the <code>Map</code> containing the new attribute values
	 * should not be preloaded with the same entries as the source attributes, or a map implementation
	 * other than <code>LinkedHashMap</code> is required.
	 * Otherwise, the {@link #replace(Attributes, boolean convertNamesToLowerCase)} method is generally more useful.
	 * <p>
	 * An attribute with no value is represented by a map entry with a <code>null</code> value.
	 * <p>
	 * Attribute values are stored unencoded in the map, and are automatically
	 * {@linkplain CharacterReference#encode(CharSequence) encoded} if necessary during output.
	 * <p>
	 * The use of invalid characters in attribute names results in unspecified behaviour.
	 * <p>
	 * Note that methods in the <code>Attributes</code> class treat attribute names as case insensitive,
	 * whereas the <code>Map</code> treats them as case sensitive.
	 *
	 * @param attributes  the <code>Attributes</code> object defining the span of the segment to replace.
	 * @param map  the <code>Map</code> containing the name/value entries.
	 * @see #replace(Attributes, boolean convertNamesToLowerCase)
	 */
	public void replace(final Attributes attributes, final Map<String,String> map) {
		register(new AttributesOutputSegment(attributes,map));
	}

	/**
	 * Replaces the specified segment of this output document with a string of spaces of the same length.
	 * <p>
	 * This method is most commonly used to remove segments of the document without affecting the character positions of the remaining elements.
	 * <p>
	 * It is used internally to implement the functionality available through the {@link Segment#ignoreWhenParsing()} method.
	 * <p>
	 * To remove a segment from the output document completely, use the {@link #remove(Segment)} method instead.
	 *
	 * @param begin  the character position at which to begin the replacement.
	 * @param end  the character position at which to end the replacement.
	 */
	public void replaceWithSpaces(final int begin, final int end) {
		register(new BlankOutputSegment(begin,end));
	}

	/**
	 * Registers the specified {@linkplain OutputSegment output segment} in this output document.
	 * <p>
	 * Use this method if you want to use a customised {@link OutputSegment} class.
	 *
	 * @param outputSegment  the output segment to register.
	 */
	public void register(final OutputSegment outputSegment) {
		outputSegments.add(outputSegment);
	}

	/**
	 * Writes the final content of this output document to the specified <code>Writer</code>.
	 * <p>
	 * The {@link #writeTo(Writer, int begin, int end)} method allows the output of a portion of the output document.
	 * <p>
	 * If the output is required in the form of a <code>Reader</code>, use {@link CharStreamSourceUtil#getReader(CharStreamSource) CharStreamSourceUtil.getReader(this)} instead.
	 *
	 * @param writer  the destination <code>java.io.Writer</code> for the output.
	 * @throws IOException if an I/O exception occurs.
	 * @see #toString()
	 */
	public void writeTo(final Writer writer) throws IOException {
		try {
			appendTo(writer);
		} finally {
			writer.flush();
		}
	}
	
	/**
	 * Writes the specified portion of the final content of this output document to the specified <code>Writer</code>.
	 * <p>
	 * Any zero-length output segments located at <code>begin</code> or <code>end</code> are included in the output.
	 *
	 * @param writer  the destination <code>java.io.Writer</code> for the output.
	 * @param begin  the character position at which to start the output, inclusive.
	 * @param end  the character position at which to end the output, exclusive.
	 * @throws IOException if an I/O exception occurs.
	 * @see #writeTo(Writer)
	 */
	public void writeTo(final Writer writer, final int begin, final int end) throws IOException {
		try {
			appendTo(writer,begin,end);
		} finally {
			writer.flush();
		}
	}

	/**
	 * Appends the final content of this output document to the specified <code>Appendable</code> object.
	 * <p>
	 * The {@link #appendTo(Appendable, int begin, int end)} method allows the output of a portion of the output document.
	 *
	 * @param appendable  the destination <code>java.lang.Appendable</code> object for the output.
	 * @throws IOException if an I/O exception occurs.
	 * @see #toString()
	 */
	public void appendTo(final Appendable appendable) throws IOException {
		appendTo(appendable,0,sourceText.length());
	}

	/**
	 * Appends the specified portion of the final content of this output document to the specified <code>Appendable</code> object.
	 * <p>
	 * Any zero-length output segments located at <code>begin</code> or <code>end</code> are included in the output.
	 *
	 * @param appendable  the destination <code>java.lang.Appendable</code> object for the output.
	 * @param begin  the character position at which to start the output, inclusive.
	 * @param end  the character position at which to end the output, exclusive.
	 * @throws IOException if an I/O exception occurs.
	 * @see #appendTo(Appendable)
	 */
	public void appendTo(final Appendable appendable, final int begin, final int end) throws IOException {
		if (outputSegments.isEmpty()) {
			appendable.append(sourceText,begin,end);
			return;
		}
		int pos=begin;
		Collections.sort(outputSegments,OutputSegment.COMPARATOR);
		for (OutputSegment outputSegment : outputSegments) {
			if (outputSegment.getEnd()<pos) continue; // skip output segments before begin, and any that are enclosed by other output segments
			if (outputSegment.getEnd()==pos && outputSegment.getBegin()<pos) continue; // skip output segments that end at pos unless they are zero length
			if (outputSegment.getBegin()>end) break; // stop processing output segments if they are not longer in the desired output range
			if (outputSegment.getBegin()==end && outputSegment.getEnd()>end) break; // stop processing output segments if they start at end unless they are zero length
			if (outputSegment.getBegin()>pos) {
				appendable.append(sourceText,pos,outputSegment.getBegin());
			}
			if (outputSegment.getBegin()<pos && outputSegment instanceof BlankOutputSegment) {
				// Overlapping BlankOutputSegments requires special handling to ensure the correct number of blanks are inserted.
				for (final int outputSegmentEnd=outputSegment.getEnd(); pos<outputSegmentEnd; pos++) appendable.append(' ');
			} else {
				outputSegment.appendTo(appendable);
				pos=outputSegment.getEnd();
			}
		}
		if (pos<end) appendable.append(sourceText,pos,end);
	}

	// Documentation inherited from CharStreamSource
	public long getEstimatedMaximumOutputLength() {
		long estimatedMaximumOutputLength=sourceText.length();
		for (OutputSegment outputSegment : outputSegments) {
			final int outputSegmentOriginalLength=outputSegment.getEnd()-outputSegment.getBegin();
			estimatedMaximumOutputLength+=(outputSegment.getEstimatedMaximumOutputLength()-outputSegmentOriginalLength);
		}
		return estimatedMaximumOutputLength>=0L ? estimatedMaximumOutputLength : -1L;
	}

	/**
	 * Returns the final content of this output document as a <code>String</code>.
	 * @return the final content of this output document as a <code>String</code>.
	 * @see #writeTo(Writer)
	 */
	public String toString() {
		return CharStreamSourceUtil.toString(this);
	}

	/**
	 * Returns a string representation of this object useful for debugging purposes.
	 * <p>
	 * The output includes details of all the {@link #getRegisteredOutputSegments() registered output segments}.
	 *
	 * @return a string representation of this object useful for debugging purposes.
	 */
	public String getDebugInfo() {
		StringBuilder sb=new StringBuilder();
		for (OutputSegment outputSegment : getRegisteredOutputSegments()) {
			if (outputSegment instanceof BlankOutputSegment)
				sb.append("Replace with Spaces: ");
			else if (outputSegment instanceof RemoveOutputSegment)
				sb.append("Remove: ");
			else
				sb.append("Replace: ");
			if (sourceText instanceof Source) {
				Source source=(Source)sourceText;
				sb.append('(');
				source.getRowColumnVector(outputSegment.getBegin()).appendTo(sb);
				sb.append('-');
				source.getRowColumnVector(outputSegment.getEnd()).appendTo(sb);
				sb.append(')');
			} else {
				sb.append("(p").append(outputSegment.getBegin()).append("-p").append(outputSegment.getEnd()).append(')');
			}
			sb.append(' ');
			String outputFromSegment=outputSegment.toString();
			if (outputFromSegment.length()<=20) {
				sb.append(outputFromSegment);
			} else {
				sb.append(outputFromSegment.substring(0,20)).append("...");
			}
			sb.append(Config.NewLine);
		}
		return sb.toString();
	}

	/**
	 * Returns a list all of the {@linkplain #register(OutputSegment) registered} {@link OutputSegment} objects in this output document.
	 * <p>
	 * The output segments are sorted in order of their {@linkplain OutputSegment#getBegin() starting position} in the document.
	 * <p>
	 * The returned list is modifiable and any changes will affect the output generated by this <code>OutputDocument</code>.
	 *
	 * @return a list all of the {@linkplain #register(OutputSegment) registered} {@link OutputSegment} objects in this output document.
	 */
	public List<OutputSegment> getRegisteredOutputSegments() {
		Collections.sort(outputSegments,OutputSegment.COMPARATOR);
		return outputSegments;
	}
}
上一页 12
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -