📄 subcache.java

📁 HTML解析器是一个Java库
💻 JAVA
📖 第 1 页 / 共 2 页
字号:
12 下一页
// Jericho HTML Parser - Java based library for analysing and manipulating HTML
// Version 3.0
// Copyright (C) 2007 Martin Jericho
// http://jerichohtml.sourceforge.net/
//
// This library is free software; you can redistribute it and/or
// modify it under the terms of either one of the following licences:
//
// 1. The Eclipse Public License (EPL) version 1.0,
// included in this distribution in the file licence-epl-1.0.html
// or available at http://www.eclipse.org/legal/epl-v10.html
//
// 2. The GNU Lesser General Public License (LGPL) version 2.1 or later,
// included in this distribution in the file licence-lgpl-2.1.txt
// or available at http://www.gnu.org/licenses/lgpl.txt
//
// This library is distributed on an "AS IS" basis,
// WITHOUT WARRANTY OF ANY KIND, either express or implied.
// See the individual licence texts for more details.

package net.htmlparser.jericho;

import java.util.*;

/**
 * Represents a cached map of character positions to tags for a particular tag type,
 * or for all tag types if the tagType field is null.
 */
final class SubCache {
	private final Cache cache;
	public final TagType tagType; // does not support unregistered tag types at present
	private final CacheEntry bof; // beginning of file marker
	private final CacheEntry eof; // end of file marker
	private CacheEntry[] array=new CacheEntry[INITIAL_CAPACITY];

	private static final int INITIAL_CAPACITY=64;

	public SubCache(final Cache cache, final TagType tagType) {
		this.cache=cache;
		this.tagType=tagType;
		array[0]=bof=new CacheEntry(0,-1,null,false,false);
		array[1]=eof=new CacheEntry(1,cache.getSourceLength(),null,false,false);
	}

	public int size() {
		return eof.index+1;
	}

	public void clear() {
		bof.nextCached=false;
		eof.index=1;
		eof.previousCached=false;
		array[1]=eof;
	}

	public void bulkLoad_Init(final int tagCount) {
		array=new CacheEntry[tagCount+2];
		array[0]=bof;
		bof.nextCached=true;
		array[eof.index=tagCount+1]=eof;
		eof.previousCached=true;
	}

	public void bulkLoad_Set(final int tagsIndex, final Tag tag) {
		final int index=tagsIndex+1;
		array[index]=new CacheEntry(index,tag.begin,tag,true,true);
	}

	public void bulkLoad_AddToTypeSpecificCache(final Tag tag) {
		final int index=eof.index;
		if (array.length==eof.index+1) doubleCapacity();
		array[index]=new CacheEntry(index,tag.begin,tag,true,true);
		eof.index++;
	}

	public void bulkLoad_FinaliseTypeSpecificCache() {
		bof.nextCached=true;
		eof.previousCached=true;
		array[eof.index]=eof;
	}

	public Tag getTagAt(final int pos, final boolean serverTagOnly) {
		// This must only be called on allTagTypesSubCache (ie tagType==null)
		if (cache.getSourceLength()==0) return null;
		if (pos<0 || pos>=cache.getSourceLength()) return null;
		final int index=getIndexOfPos(pos);
		final CacheEntry cacheEntry=array[index];
		if (cacheEntry.pos==pos) {
			if (serverTagOnly && !cacheEntry.tag.getTagType().isServerTag()) return null;
			return cacheEntry.tag;
		}
		if (cacheEntry.previousCached) return null;
		return cache.addTagAt(pos,serverTagOnly);
	}

	public void addTagAt(final int pos, final Tag tag) {
		final int index=getIndexOfPos(pos);
		final CacheEntry nextCacheEntry=array[index];
		final CacheEntry previousCacheEntry=getPrevious(nextCacheEntry);
		add(previousCacheEntry,new CacheEntry(index,pos,tag,pos==previousCacheEntry.pos+1,pos==nextCacheEntry.pos-1),nextCacheEntry);
	}

	public Tag getPreviousTag(final int pos) {
		// Note that this method never returns tags for which tag.includInSearch() is false, so separate caching of unregistered tags won't work.
		if (cache.getSourceLength()==0) return null;
		if (pos<0 || pos>=cache.getSourceLength()) return null;
		int index=getIndexOfPos(pos);
		final CacheEntry cacheEntry=array[index];
		final Tag tag;
		if (cacheEntry.pos==pos && cacheEntry.tag!=null && cacheEntry.tag.includeInSearch()) return cacheEntry.tag;
		tag=getPreviousTag(getPrevious(cacheEntry),pos,cacheEntry);
		addPreviousTag(pos,tag);
		return tag;
	}

	public Tag getNextTag(final int pos) {
		// Note that this method never returns tags for which tag.includInSearch() is false, so separate caching of unregistered tags won't work.
		if (cache.getSourceLength()==0) return null;
		if (pos<0 || pos>=cache.getSourceLength()) return null;
		int index=getIndexOfPos(pos);
		final CacheEntry cacheEntry=array[index];
		final Tag tag;
		if (cacheEntry.pos==pos) {
			if (cacheEntry.tag!=null && cacheEntry.tag.includeInSearch()) return cacheEntry.tag;
			tag=getNextTag(cacheEntry,pos,getNext(cacheEntry));
		} else {
			tag=getNextTag(getPrevious(cacheEntry),pos,cacheEntry);
		}
		addNextTag(pos,tag);
		return tag;
	}

	public Iterator<Tag> getTagIterator() {
		return new TagIterator();
	}

	public String toString() {
		return appendTo(new StringBuilder()).toString();
	}

	protected StringBuilder appendTo(final StringBuilder sb) {
		sb.append("Cache for TagType : ").append(tagType).append(Config.NewLine);
		for (int i=0; i<=lastIndex(); i++) sb.append(array[i]).append(Config.NewLine);
		return sb;
	}

	private Tag getPreviousTag(CacheEntry previousCacheEntry, int pos, CacheEntry nextCacheEntry) {
		// previousCacheEntry.pos < pos <= nextCacheEntry.pos
		while (true) {
			if (!nextCacheEntry.previousCached) {
				final Tag tag=Tag.getPreviousTagUncached(cache.source,pos,tagType,previousCacheEntry.pos); // if useAllTypesCache is true, automatically adds tag to all caches if one is found, and maybe some unregistered tags along the way.
				if (tag!=null) {
					if (!cache.source.useAllTypesCache) addTagAt(tag.begin,tag); // have to add tag manually if useAllTypesCache is false
					return tag;
				}
			}
			if (previousCacheEntry==bof) return null;
			if (previousCacheEntry.tag!=null && previousCacheEntry.tag.includeInSearch()) return previousCacheEntry.tag;
			pos=previousCacheEntry.pos-1;
			previousCacheEntry=getPrevious(nextCacheEntry=previousCacheEntry);
		}
	}

	private Tag getNextTag(CacheEntry previousCacheEntry, int pos, CacheEntry nextCacheEntry) {
		// previousCacheEntry.pos <= pos < nextCacheEntry.pos
		while (true) {
			if (!previousCacheEntry.nextCached) {
				final Tag tag=Tag.getNextTagUncached(cache.source,pos,tagType,nextCacheEntry.pos); // if useAllTypesCache is true, automatically adds tag to caches if one is found, and maybe some unregistered tags along the way.
				if (tag!=null) {
					if (!cache.source.useAllTypesCache) addTagAt(tag.begin,tag); // have to add tag manually if useAllTypesCache is false
					return tag;
				}
			}
			if (nextCacheEntry==eof) return null;
			if (nextCacheEntry.tag!=null && nextCacheEntry.tag.includeInSearch()) return nextCacheEntry.tag;
			pos=nextCacheEntry.pos+1;
			nextCacheEntry=getNext(previousCacheEntry=nextCacheEntry);
		}
	}

	private void addPreviousTag(final int pos, final Tag tag) {
		final int tagPos=(tag==null) ? bof.pos : tag.begin;
		if (tagPos==pos) return; // the tag was found exactly on pos, so cache has already been fully updated
		// tagPos < pos
		int index=getIndexOfPos(pos);
		CacheEntry stepCacheEntry=array[index];
		// stepCacheEntry.pos is either == or > than tagPos.
		// stepCacheEntry.pos is either == or > pos.
		int compactStartIndex=Integer.MAX_VALUE;
		if (stepCacheEntry.pos==pos) {
			// a cache entry was aleady at pos (containing null or wrong tagType)
			stepCacheEntry.previousCached=true;
			if (stepCacheEntry.isRedundant()) {stepCacheEntry.removed=true; compactStartIndex=Math.min(compactStartIndex,stepCacheEntry.index);}
		} else if (!stepCacheEntry.previousCached) {
			// we have to add a new cacheEntry at pos:
			if (tagType==null)
				cache.addTagAt(pos,false); // this pos has never been checked before, so add it to all relevant SubCaches (a null or unregistered tag entry is always added to this SubCache)
			else
				addTagAt(pos,null); // all we know is that the pos doesn't contain a tag of this SubCache's type, so add a null entry to this SubCache only.
			// now we have to reload the index and stepCacheEntry as they may have changed:
			stepCacheEntry=array[index=getIndexOfPos(pos)];
			// stepCacheEntry.pos is either == or > than tagPos.
			// stepCacheEntry.pos is either == or > pos. (the latter if the added entry was redundant)
			if (stepCacheEntry.pos==pos) {
				// perform same steps as in the (stepCacheEntry.pos==pos) if condition above:
				stepCacheEntry.previousCached=true;
				if (stepCacheEntry.isRedundant()) {stepCacheEntry.removed=true; compactStartIndex=Math.min(compactStartIndex,stepCacheEntry.index);}
			}
		}
		while (true) {
			stepCacheEntry=array[--index];
			if (stepCacheEntry.pos<=tagPos) break;
			if (stepCacheEntry.tag!=null) {
				if (stepCacheEntry.tag.includeInSearch()) throw new SourceCacheEntryMissingInternalError(tagType,tag,this);
				stepCacheEntry.previousCached=true;
				stepCacheEntry.nextCached=true;
			} else {
				stepCacheEntry.removed=true; compactStartIndex=Math.min(compactStartIndex,stepCacheEntry.index);
12 下一页
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -