📄 source.java

📁 HTML解析器是一个Java库
💻 JAVA
📖 第 1 页 / 共 5 页
字号:
	 * such as {@link HTMLElementName#TBODY TBODY} if they are not present in the source text.
	 * <p>
	 * Elements formed from {@linkplain TagType#isServerTag() server tags} are not included in the hierarchy at all.
	 * <p>
	 * Structural errors in this source document such as overlapping elements are reported in the {@linkplain #getLogger() log}.
	 * When elements are found to overlap, the position of the start tag determines the location of the element in the hierarchy.
	 * <p>
	 * Calling this method on the <code>Source</code> object performs a {@linkplain #fullSequentialParse() full sequential parse} automatically.
	 * <p>
	 * A visual representation of the document element hierarchy can be obtained by calling:<br />
	 * {@link #getSourceFormatter()}<code>.</code>{@link SourceFormatter#setIndentAllElements(boolean) setIndentAllElements(true)}<code>.</code>{@link SourceFormatter#setCollapseWhiteSpace(boolean) setCollapseWhiteSpace(true)}<code>.</code>{@link SourceFormatter#setTidyTags(boolean) setTidyTags(true)}<code>.</code>{@link SourceFormatter#toString() toString()}
	 *
	 * @return a list of the top-level {@linkplain Element elements} in the document element hierarchy, guaranteed not <code>null</code>.
	 * @see Element#getParentElement()
	 * @see Element#getChildElements()
	 * @see Element#getDepth()
	 */
	@Override public List<Element> getChildElements() {
		if (childElements==null) {
			if (length()==0) {
				childElements=Collections.emptyList();
			} else {
				if (allTags==null) fullSequentialParse();
				childElements=new ArrayList<Element>();
				int pos=0;
				while (true) {
					final StartTag childStartTag=source.getNextStartTag(pos);
					if (childStartTag==null) break;
					if (!Config.IncludeServerTagsInElementHierarchy && childStartTag.getTagType().isServerTag()) {
						pos=childStartTag.end;
						continue;
					}
					final Element childElement=childStartTag.getElement();
					childElement.getChildElements(0);
					if (childElement.parentElement==Element.NOT_CACHED) { // make sure element was not added as a child of a descendent element (can happen with overlapping elements)
						childElement.parentElement=null;
						childElements.add(childElement);
					}
					pos=childElement.end;
				}
			}
		}
		return childElements;
	}

	/**
	 * Formats the HTML source by laying out each non-inline-level element on a new line with an appropriate indent.
	 * <p>
	 * The output format can be configured by setting any number of properties on the returned {@link SourceFormatter} instance before
	 * {@linkplain SourceFormatter#writeTo(Writer) obtaining its output}.
	 * <p>
	 * To create a <code>SourceFormatter</code> instance based on a {@link Segment} rather than an entire <code>Source</code> document,
	 * use {@linkplain SourceFormatter#SourceFormatter(Segment) new SourceFormatter(segment)} instead.
	 * 
	 * @return an instance of {@link SourceFormatter} based on this source document.
	 */
	public SourceFormatter getSourceFormatter() {
		return new SourceFormatter(this);
	}

	/**
	 * Returns a list of all {@linkplain Tag tags} in this source document.
	 * <p>
	 * Calling this method on the <code>Source</code> object performs a {@linkplain #fullSequentialParse() full sequential parse} automatically.
	 * <p>
	 * See the {@link Tag} class documentation for more details about the behaviour of this method.
	 *
	 * @return a list of all {@linkplain Tag tags} in this source document.
	 */
	public List<Tag> getAllTags() {
		if (allTags==null) fullSequentialParse();
		return allTags;
	}

	/**
	 * Returns a list of all {@linkplain StartTag start tags} in this source document.
	 * <p>
	 * Calling this method on the <code>Source</code> object performs a {@linkplain #fullSequentialParse() full sequential parse} automatically.
	 * <p>
	 * See the {@link Tag} class documentation for more details about the behaviour of this method.
	 *
	 * @return a list of all {@linkplain StartTag start tags} in this source document.
	 */
	public List<StartTag> getAllStartTags() {
		if (allStartTags==null) {
			final List<Tag> allTags=getAllTags();
			allStartTags=new ArrayList<StartTag>(allTags.size());
			for (Tag tag : allTags) if (tag instanceof StartTag) allStartTags.add((StartTag)tag);
		}
		return allStartTags;
	}

	/**
	 * Returns a list of all {@linkplain Element elements} in this source document.
	 * <p>
	 * Calling this method on the <code>Source</code> object performs a {@linkplain #fullSequentialParse() full sequential parse} automatically.
	 * <p>
	 * The elements returned correspond exactly with the start tags returned in the {@link #getAllStartTags()} method.
	 *
	 * @return a list of all {@linkplain Element elements} in this source document.
	 */
	public List<Element> getAllElements() {
		if (allElements==null) {
			final List<StartTag> allStartTags=getAllStartTags();
			if (allStartTags.isEmpty()) return Collections.emptyList();
			allElements=new ArrayList<Element>(allStartTags.size());
			for (StartTag startTag : allStartTags) allElements.add(startTag.getElement());
		}
		return allElements;
	}

	/**
	 * Returns the {@link Element} with the specified <code>id</code> attribute value.
	 * <p>
	 * This simulates the script method
	 * <code><a target="_blank" href="http://www.w3.org/TR/1998/REC-DOM-Level-1-19981001/level-one-html.html#ID-36113835">getElementById</a></code>
	 * defined in DOM HTML level 1.
	 * <p>
	 * This is equivalent to {@link #getFirstElement(String,String,boolean) getFirstElement}<code>("id",id,true)</code>.
	 * <p>
	 * A well formed HTML document should have no more than one element with any given <code>id</code> attribute value.
	 *
	 * @param id  the <code>id</code> attribute value (case sensitive) to search for, must not be <code>null</code>.
	 * @return the {@link Element} with the specified <code>id</code> attribute value, or <code>null</code> if no such element exists.
	 */
	public Element getElementById(final String id) {
		return getFirstElement(Attribute.ID,id,true);
	}

	/**
	 * Returns the {@link Tag} at the specified position in the source document.
	 * <p>
	 * See the {@link Tag} class documentation for more details about the behaviour of this method.
	 * <p>
	 * This method also returns {@linkplain Tag#isUnregistered() unregistered} tags.
	 *
	 * @param pos  the position in the source document, may be out of bounds.
	 * @return the {@link Tag} at the specified position in the source document, or <code>null</code> if no tag exists at the specified position or it is out of bounds.
	 */
	public final Tag getTagAt(final int pos) {
		return Tag.getTagAt(this,pos,false);
	}

	/**
	 * Returns the {@link Tag} beginning at or immediately preceding (or {@linkplain Segment#encloses(int) enclosing}) the specified position in the source document.
	 * <p>
	 * See the {@link Tag} class documentation for more details about the behaviour of this method.
	 *
	 * @param pos  the position in the source document from which to start the search, may be out of bounds.
	 * @return the {@link Tag} beginning at or immediately preceding the specified position in the source document, or <code>null</code> if none exists or the specified position is out of bounds.
	 */
	public Tag getPreviousTag(final int pos) {
		return Tag.getPreviousTag(this,pos);
	}

	/**
	 * Returns the {@link Tag} of the specified {@linkplain TagType type} beginning at or immediately preceding (or {@linkplain Segment#encloses(int) enclosing}) the specified position in the source document.
	 * <p>
	 * See the {@link Tag} class documentation for more details about the behaviour of this method.
	 *
	 * @param pos  the position in the source document from which to start the search, may be out of bounds.
	 * @param tagType  the <code>TagType</code> to search for.
	 * @return the {@link Tag} of the specified {@linkplain TagType type} beginning at or immediately preceding the specified position in the source document, or <code>null</code> if none exists or the specified position is out of bounds.
	 */
	public Tag getPreviousTag(final int pos, final TagType tagType) {
		return Tag.getPreviousTag(this,pos,tagType);
	}
	
	/**
	 * Returns the {@link Tag} beginning at or immediately following the specified position in the source document.
	 * <p>
	 * See the {@link Tag} class documentation for more details about the behaviour of this method.
	 * <p>
	 * Use {@link Tag#getNextTag()} to get the tag immediately following another tag.
	 *
	 * @param pos  the position in the source document from which to start the search, may be out of bounds.
	 * @return the {@link Tag} beginning at or immediately following the specified position in the source document, or <code>null</code> if none exists or the specified position is out of bounds.
	 */
	public Tag getNextTag(final int pos) {
		return Tag.getNextTag(this,pos);
	}

	Tag getNextNonServerTag(int pos) {
		while (true) {
			final Tag tag=getNextTag(pos);
			if (tag==null) return null;
			if (!tag.getTagType().isServerTag()) return tag;
			pos=tag.end;
		}
	}

	Tag getPreviousNonServerTag(int pos) {
		while (true) {
			final Tag tag=getPreviousTag(pos-1);
			if (tag==null) return null;
			if (!tag.getTagType().isServerTag()) return tag;
			pos=tag.begin-1;
		}
	}

	/**
	 * Returns the {@link Tag} of the specified {@linkplain TagType type} beginning at or immediately following the specified position in the source document.
	 * <p>
	 * See the {@link Tag} class documentation for more details about the behaviour of this method.
	 *
	 * @param pos  the position in the source document from which to start the search, may be out of bounds.
	 * @param tagType  the <code>TagType</code> to search for.
	 * @return the {@link Tag} of the specified {@linkplain TagType type} beginning at or immediately following the specified position in the source document, or <code>null</code> if none exists or the specified position is out of bounds.
	 */
	public Tag getNextTag(final int pos, final TagType tagType) {
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -