sax2dtm.java

来自「JAVA 所有包」· Java 代码 · 共 2,129 行 · 第 1/5 页

JAVA
2,129
字号
/* * Copyright 1999-2005 The Apache Software Foundation. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * *     http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. *//* * $Id: SAX2DTM.java,v 1.2.4.1 2005/09/15 08:15:11 suresh_emailid Exp $ */package com.sun.org.apache.xml.internal.dtm.ref.sax2dtm;import java.util.Hashtable;import java.util.Vector;import javax.xml.transform.Source;import javax.xml.transform.SourceLocator;import com.sun.org.apache.xml.internal.dtm.*;import com.sun.org.apache.xml.internal.dtm.ref.*;import com.sun.org.apache.xml.internal.utils.StringVector;import com.sun.org.apache.xml.internal.utils.IntVector;import com.sun.org.apache.xml.internal.utils.FastStringBuffer;import com.sun.org.apache.xml.internal.utils.IntStack;import com.sun.org.apache.xml.internal.utils.SuballocatedIntVector;import com.sun.org.apache.xml.internal.utils.SystemIDResolver;import com.sun.org.apache.xml.internal.utils.WrappedRuntimeException;import com.sun.org.apache.xml.internal.utils.XMLString;import com.sun.org.apache.xml.internal.utils.XMLStringFactory;import com.sun.org.apache.xml.internal.res.XMLErrorResources;import com.sun.org.apache.xml.internal.res.XMLMessages;import org.xml.sax.*;import org.xml.sax.ext.*;/** * This class implements a DTM that tends to be optimized more for speed than * for compactness, that is constructed via SAX2 ContentHandler events. */public class SAX2DTM extends DTMDefaultBaseIterators        implements EntityResolver, DTDHandler, ContentHandler, ErrorHandler,                   DeclHandler, LexicalHandler{  /** Set true to monitor SAX events and similar diagnostic info. */  private static final boolean DEBUG = false;  /**   * If we're building the model incrementally on demand, we need to   * be able to tell the source when to send us more data.   *   * Note that if this has not been set, and you attempt to read ahead   * of the current build point, we'll probably throw a null-pointer   * exception. We could try to wait-and-retry instead, as a very poor   * fallback, but that has all the known problems with multithreading   * on multiprocessors and we Don't Want to Go There.   *   * @see setIncrementalSAXSource   */  private IncrementalSAXSource m_incrementalSAXSource = null;  /**   * All the character content, including attribute values, are stored in   * this buffer.   *   * %REVIEW% Should this have an option of being shared across DTMs?   * Sequentially only; not threadsafe... Currently, I think not.   *   * %REVIEW% Initial size was pushed way down to reduce weight of RTFs.   * pending reduction in number of RTF DTMs. Now that we're sharing a DTM   * between RTFs, and tail-pruning... consider going back to the larger/faster.   *   * Made protected rather than private so SAX2RTFDTM can access it.   */  //private FastStringBuffer m_chars = new FastStringBuffer(13, 13);  protected FastStringBuffer m_chars;  /** This vector holds offset and length data.   */  protected SuballocatedIntVector m_data;  /** The parent stack, needed only for construction.   * Made protected rather than private so SAX2RTFDTM can access it.   */  transient protected IntStack m_parents;  /** The current previous node, needed only for construction time.   * Made protected rather than private so SAX2RTFDTM can access it.   */  transient protected int m_previous = 0;  /** Namespace support, only relevent at construction time.   * Made protected rather than private so SAX2RTFDTM can access it.   */  transient protected java.util.Vector m_prefixMappings =    new java.util.Vector();  /** Namespace support, only relevent at construction time.   * Made protected rather than private so SAX2RTFDTM can access it.   */  transient protected IntStack m_contextIndexes;  /** Type of next characters() event within text block in prgress. */  transient protected int m_textType = DTM.TEXT_NODE;  /**   * Type of coalesced text block. See logic in the characters()   * method.   */  transient protected int m_coalescedTextType = DTM.TEXT_NODE;  /** The SAX Document locator */  transient protected Locator m_locator = null;  /** The SAX Document system-id */  transient private String m_systemId = null;  /** We are inside the DTD.  This is used for ignoring comments.  */  transient protected boolean m_insideDTD = false;  /** Tree Walker for dispatchToEvents. */  protected DTMTreeWalker m_walker = new DTMTreeWalker();  /** pool of string values that come as strings. */  protected DTMStringPool m_valuesOrPrefixes;  /** End document has been reached.   * Made protected rather than private so SAX2RTFDTM can access it.   */  protected boolean m_endDocumentOccured = false;  /** Data or qualified name values, one array element for each node. */  protected SuballocatedIntVector m_dataOrQName;  /**   * This table holds the ID string to node associations, for   * XML IDs.   */  protected Hashtable m_idAttributes = new Hashtable();  /**   * fixed dom-style names.   */  private static final String[] m_fixednames = { null,                     null,  // nothing, Element                    null, "#text",  // Attr, Text                    "#cdata_section", null,  // CDATA, EntityReference                    null, null,  // Entity, PI                    "#comment", "#document",  // Comment, Document                    null, "#document-fragment",  // Doctype, DocumentFragment                    null };  // Notation  /**   * Vector of entities.  Each record is composed of four Strings:   *  publicId, systemID, notationName, and name.   */  private Vector m_entities = null;  /** m_entities public ID offset. */  private static final int ENTITY_FIELD_PUBLICID = 0;  /** m_entities system ID offset. */  private static final int ENTITY_FIELD_SYSTEMID = 1;  /** m_entities notation name offset. */  private static final int ENTITY_FIELD_NOTATIONNAME = 2;  /** m_entities name offset. */  private static final int ENTITY_FIELD_NAME = 3;  /** Number of entries per record for m_entities. */  private static final int ENTITY_FIELDS_PER = 4;  /**   * The starting offset within m_chars for the text or   * CDATA_SECTION node currently being acumulated,   * or -1 if there is no text node in progress   */  protected int m_textPendingStart = -1;  /**   * Describes whether information about document source location   * should be maintained or not.   *    * Made protected for access by SAX2RTFDTM.   */  protected boolean m_useSourceLocationProperty = false;   /** Made protected for access by SAX2RTFDTM.   */  protected StringVector m_sourceSystemId;   /** Made protected for access by SAX2RTFDTM.   */  protected IntVector m_sourceLine;   /** Made protected for access by SAX2RTFDTM.   */  protected IntVector m_sourceColumn;    /**   * Construct a SAX2DTM object using the default block size.   *   * @param mgr The DTMManager who owns this DTM.   * @param source the JAXP 1.1 Source object for this DTM.   * @param dtmIdentity The DTM identity ID for this DTM.   * @param whiteSpaceFilter The white space filter for this DTM, which may   *                         be null.   * @param xstringfactory XMLString factory for creating character content.   * @param doIndexing true if the caller considers it worth it to use   *                   indexing schemes.   */  public SAX2DTM(DTMManager mgr, Source source, int dtmIdentity,                 DTMWSFilter whiteSpaceFilter,                 XMLStringFactory xstringfactory,                 boolean doIndexing)  {    this(mgr, source, dtmIdentity, whiteSpaceFilter,          xstringfactory, doIndexing, DEFAULT_BLOCKSIZE, true, false);  }    /**   * Construct a SAX2DTM object ready to be constructed from SAX2   * ContentHandler events.   *   * @param mgr The DTMManager who owns this DTM.   * @param source the JAXP 1.1 Source object for this DTM.   * @param dtmIdentity The DTM identity ID for this DTM.   * @param whiteSpaceFilter The white space filter for this DTM, which may   *                         be null.   * @param xstringfactory XMLString factory for creating character content.   * @param doIndexing true if the caller considers it worth it to use   *                   indexing schemes.   * @param blocksize The block size of the DTM.   * @param usePrevsib true if we want to build the previous sibling node array.   * @param newNameTable true if we want to use a new ExpandedNameTable for this DTM.   */  public SAX2DTM(DTMManager mgr, Source source, int dtmIdentity,                 DTMWSFilter whiteSpaceFilter,                 XMLStringFactory xstringfactory,                 boolean doIndexing,                 int blocksize,                 boolean usePrevsib,                 boolean newNameTable)  {    super(mgr, source, dtmIdentity, whiteSpaceFilter,          xstringfactory, doIndexing, blocksize, usePrevsib, newNameTable);    // %OPT% Use smaller sizes for all internal storage units when    // the blocksize is small. This reduces the cost of creating an RTF.    if (blocksize <= 64)     {      m_data = new SuballocatedIntVector(blocksize, DEFAULT_NUMBLOCKS_SMALL);      m_dataOrQName = new SuballocatedIntVector(blocksize, DEFAULT_NUMBLOCKS_SMALL);      m_valuesOrPrefixes = new DTMStringPool(16);      m_chars = new FastStringBuffer(7, 10);      m_contextIndexes = new IntStack(4);      m_parents = new IntStack(4);    }    else    {      m_data = new SuballocatedIntVector(blocksize, DEFAULT_NUMBLOCKS);      m_dataOrQName = new SuballocatedIntVector(blocksize, DEFAULT_NUMBLOCKS);      m_valuesOrPrefixes = new DTMStringPool();      m_chars = new FastStringBuffer(10, 13);      m_contextIndexes = new IntStack();      m_parents = new IntStack();    }             // %REVIEW%  Initial size pushed way down to reduce weight of RTFs    // (I'm not entirely sure 0 would work, so I'm playing it safe for now.)    //m_data = new SuballocatedIntVector(doIndexing ? (1024*2) : 512, 1024);    //m_data = new SuballocatedIntVector(blocksize);    m_data.addElement(0);   // Need placeholder in case index into here must be <0.    //m_dataOrQName = new SuballocatedIntVector(blocksize);        // m_useSourceLocationProperty=com.sun.org.apache.xalan.internal.processor.TransformerFactoryImpl.m_source_location;    m_useSourceLocationProperty = mgr.getSource_location();    m_sourceSystemId = (m_useSourceLocationProperty) ? new StringVector() : null; 	m_sourceLine = (m_useSourceLocationProperty) ?  new IntVector() : null;    m_sourceColumn = (m_useSourceLocationProperty) ?  new IntVector() : null;   }  /**   * Set whether information about document source location   * should be maintained or not.    */  public void setUseSourceLocation(boolean useSourceLocation)  {    m_useSourceLocationProperty = useSourceLocation;  }  /**   * Get the data or qualified name for the given node identity.   *   * @param identity The node identity.   *   * @return The data or qualified name, or DTM.NULL.   */  protected int _dataOrQName(int identity)  {    if (identity < m_size)      return m_dataOrQName.elementAt(identity);    // Check to see if the information requested has been processed, and,    // if not, advance the iterator until we the information has been    // processed.    while (true)    {      boolean isMore = nextNode();      if (!isMore)        return NULL;      else if (identity < m_size)        return m_dataOrQName.elementAt(identity);    }  }  /**   * Ask the CoRoutine parser to doTerminate and clear the reference.   */  public void clearCoRoutine()  {    clearCoRoutine(true);  }  /**   * Ask the CoRoutine parser to doTerminate and clear the reference. If   * the CoRoutine parser has already been cleared, this will have no effect.   *   * @param callDoTerminate true of doTerminate should be called on the   * coRoutine parser.   */  public void clearCoRoutine(boolean callDoTerminate)  {    if (null != m_incrementalSAXSource)    {      if (callDoTerminate)        m_incrementalSAXSource.deliverMoreNodes(false);      m_incrementalSAXSource = null;    }  }  /**   * Bind a IncrementalSAXSource to this DTM. If we discover we need nodes   * that have not yet been built, we will ask this object to send us more   * events, and it will manage interactions with its data sources.   *   * Note that we do not actually build the IncrementalSAXSource, since we don't   * know what source it's reading from, what thread that source will run in,   * or when it will run.   *   * @param incrementalSAXSource The parser that we want to recieve events from   * on demand.   */  public void setIncrementalSAXSource(IncrementalSAXSource incrementalSAXSource)  {    // Establish coroutine link so we can request more data    //    // Note: It's possible that some versions of IncrementalSAXSource may    // not actually use a CoroutineManager, and hence may not require    // that we obtain an Application Coroutine ID. (This relies on the    // coroutine transaction details having been encapsulated in the    // IncrementalSAXSource.do...() methods.)    m_incrementalSAXSource = incrementalSAXSource;    // Establish SAX-stream link so we can receive the requested data    incrementalSAXSource.setContentHandler(this);    incrementalSAXSource.setLexicalHandler(this);    incrementalSAXSource.setDTDHandler(this);    // Are the following really needed? incrementalSAXSource doesn't yet    // support them, and they're mostly no-ops here...    //incrementalSAXSource.setErrorHandler(this);    //incrementalSAXSource.setDeclHandler(this);  }  /**   * getContentHandler returns "our SAX builder" -- the thing that   * someone else should send SAX events to in order to extend this   * DTM model.   *   * %REVIEW% Should this return null if constrution already done/begun?   *   * @return null if this model doesn't respond to SAX events,   * "this" if the DTM object has a built-in SAX ContentHandler,   * the IncrementalSAXSource if we're bound to one and should receive   * the SAX stream via it for incremental build purposes...   */  public ContentHandler getContentHandler()  {    if (m_incrementalSAXSource instanceof IncrementalSAXSource_Filter)      return (ContentHandler) m_incrementalSAXSource;    else      return this;  }  /**   * Return this DTM's lexical handler.   *   * %REVIEW% Should this return null if constrution already done/begun?   *   * @return null if this model doesn't respond to lexical SAX events,   * "this" if the DTM object has a built-in SAX ContentHandler,   * the IncrementalSAXSource if we're bound to one and should receive   * the SAX stream via it for incremental build purposes...   */  public LexicalHandler getLexicalHandler()  {    if (m_incrementalSAXSource instanceof IncrementalSAXSource_Filter)      return (LexicalHandler) m_incrementalSAXSource;    else      return this;

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?