dom2dtm.java

来自「Mobile 应用程序使用 Java Micro Edition (Java M」· Java 代码 · 共 1,764 行 · 第 1/4 页

JAVA
1,764
字号
/* * Copyright 1999-2004 The Apache Software Foundation. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * *     http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. *//* * $Id: DOM2DTM.java,v 1.2.4.1 2005/09/15 08:15:10 suresh_emailid Exp $ */package com.sun.org.apache.xml.internal.dtm.ref.dom2dtm;import java.util.Vector;import javax.xml.transform.SourceLocator;import javax.xml.transform.dom.DOMSource;import com.sun.org.apache.xml.internal.dtm.DTM;import com.sun.org.apache.xml.internal.dtm.DTMManager;import com.sun.org.apache.xml.internal.dtm.DTMWSFilter;import com.sun.org.apache.xml.internal.dtm.ref.DTMDefaultBaseIterators;import com.sun.org.apache.xml.internal.dtm.ref.DTMManagerDefault;import com.sun.org.apache.xml.internal.dtm.ref.ExpandedNameTable;import com.sun.org.apache.xml.internal.dtm.ref.IncrementalSAXSource;import com.sun.org.apache.xml.internal.res.XMLErrorResources;import com.sun.org.apache.xml.internal.res.XMLMessages;import com.sun.org.apache.xml.internal.utils.FastStringBuffer;import com.sun.org.apache.xml.internal.utils.QName;import com.sun.org.apache.xml.internal.utils.StringBufferPool;import com.sun.org.apache.xml.internal.utils.TreeWalker;import com.sun.org.apache.xml.internal.utils.XMLCharacterRecognizer;import com.sun.org.apache.xml.internal.utils.XMLString;import com.sun.org.apache.xml.internal.utils.XMLStringFactory;import org.w3c.dom.Attr;import org.w3c.dom.Document;import org.w3c.dom.DocumentType;import org.w3c.dom.Element;import org.w3c.dom.Entity;import org.w3c.dom.NamedNodeMap;import org.w3c.dom.Node;import org.xml.sax.ContentHandler;/** The <code>DOM2DTM</code> class serves up a DOM's contents via the * DTM API. * * Note that it doesn't necessarily represent a full Document * tree. You can wrap a DOM2DTM around a specific node and its subtree * and the right things should happen. (I don't _think_ we currently * support DocumentFrgment nodes as roots, though that might be worth * considering.) * * Note too that we do not currently attempt to track document * mutation. If you alter the DOM after wrapping DOM2DTM around it, * all bets are off. * */public class DOM2DTM extends DTMDefaultBaseIterators{  static final boolean JJK_DEBUG=false;  static final boolean JJK_NEWCODE=true;    /** Manefest constant   */  static final String NAMESPACE_DECL_NS="http://www.w3.org/XML/1998/namespace";    /** The current position in the DOM tree. Last node examined for   * possible copying to DTM. */  transient private Node m_pos;  /** The current position in the DTM tree. Who children get appended to. */  private int m_last_parent=0;  /** The current position in the DTM tree. Who children reference as their    * previous sib. */  private int m_last_kid=NULL;  /** The top of the subtree.   * %REVIEW%: 'may not be the same as m_context if "//foo" pattern.'   * */  transient private Node m_root;  /** True iff the first element has been processed. This is used to control      synthesis of the implied xml: namespace declaration node. */  boolean m_processedFirstElement=false;          /** true if ALL the nodes in the m_root subtree have been processed;   * false if our incremental build has not yet finished scanning the   * DOM tree.  */  transient private boolean m_nodesAreProcessed;  /** The node objects.  The instance part of the handle indexes   * directly into this vector.  Each DTM node may actually be   * composed of several DOM nodes (for example, if logically-adjacent   * Text/CDATASection nodes in the DOM have been coalesced into a   * single DTM Text node); this table points only to the first in   * that sequence. */  protected Vector m_nodes = new Vector();  /**   * Construct a DOM2DTM object from a DOM node.   *   * @param mgr The DTMManager who owns this DTM.   * @param domSource the DOM source that this DTM will wrap.   * @param dtmIdentity The DTM identity ID for this DTM.   * @param whiteSpaceFilter The white space filter for this DTM, which may    *                         be null.   * @param xstringfactory XMLString factory for creating character content.   * @param doIndexing true if the caller considers it worth it to use    *                   indexing schemes.   */  public DOM2DTM(DTMManager mgr, DOMSource domSource,                  int dtmIdentity, DTMWSFilter whiteSpaceFilter,                 XMLStringFactory xstringfactory,                 boolean doIndexing)  {    super(mgr, domSource, dtmIdentity, whiteSpaceFilter,           xstringfactory, doIndexing);    // Initialize DOM navigation    m_pos=m_root = domSource.getNode();    // Initialize DTM navigation    m_last_parent=m_last_kid=NULL;    m_last_kid=addNode(m_root, m_last_parent,m_last_kid, NULL);    // Apparently the domSource root may not actually be the    // Document node. If it's an Element node, we need to immediately    // add its attributes. Adapted from nextNode().    // %REVIEW% Move this logic into addNode and recurse? Cleaner!    //    // (If it's an EntityReference node, we're probably scrod. For now    // I'm just hoping nobody is ever quite that foolish... %REVIEW%)		//		// %ISSUE% What about inherited namespaces in this case?		// Do we need to special-case initialize them into the DTM model?    if(ELEMENT_NODE == m_root.getNodeType())    {      NamedNodeMap attrs=m_root.getAttributes();      int attrsize=(attrs==null) ? 0 : attrs.getLength();      if(attrsize>0)      {        int attrIndex=NULL; // start with no previous sib        for(int i=0;i<attrsize;++i)        {          // No need to force nodetype in this case;          // addNode() will take care of switching it from          // Attr to Namespace if necessary.          attrIndex=addNode(attrs.item(i),0,attrIndex,NULL);          m_firstch.setElementAt(DTM.NULL,attrIndex);        }        // Terminate list of attrs, and make sure they aren't        // considered children of the element        m_nextsib.setElementAt(DTM.NULL,attrIndex);        // IMPORTANT: This does NOT change m_last_parent or m_last_kid!      } // if attrs exist    } //if(ELEMENT_NODE)    // Initialize DTM-completed status     m_nodesAreProcessed = false;  }  /**   * Construct the node map from the node.   *   * @param node The node that is to be added to the DTM.   * @param parentIndex The current parent index.   * @param previousSibling The previous sibling index.   * @param forceNodeType If not DTM.NULL, overrides the DOM node type.   *	Used to force nodes to Text rather than CDATASection when their   *	coalesced value includes ordinary Text nodes (current DTM behavior).   *   * @return The index identity of the node that was added.   */  protected int addNode(Node node, int parentIndex,                        int previousSibling, int forceNodeType)  {    int nodeIndex = m_nodes.size();    // Have we overflowed a DTM Identity's addressing range?    if(m_dtmIdent.size() == (nodeIndex>>>DTMManager.IDENT_DTM_NODE_BITS))    {      try      {        if(m_mgr==null)          throw new ClassCastException();                                                                // Handle as Extended Addressing        DTMManagerDefault mgrD=(DTMManagerDefault)m_mgr;        int id=mgrD.getFirstFreeDTMID();        mgrD.addDTM(this,id,nodeIndex);        m_dtmIdent.addElement(id<<DTMManager.IDENT_DTM_NODE_BITS);      }      catch(ClassCastException e)      {        // %REVIEW% Wrong error message, but I've been told we're trying        // not to add messages right not for I18N reasons.        // %REVIEW% Should this be a Fatal Error?        error(XMLMessages.createXMLMessage(XMLErrorResources.ER_NO_DTMIDS_AVAIL, null));//"No more DTM IDs are available";      }    }    m_size++;    // ensureSize(nodeIndex);        int type;    if(NULL==forceNodeType)        type = node.getNodeType();    else        type=forceNodeType;            // %REVIEW% The Namespace Spec currently says that Namespaces are    // processed in a non-namespace-aware manner, by matching the    // QName, even though there is in fact a namespace assigned to    // these nodes in the DOM. If and when that changes, we will have    // to consider whether we check the namespace-for-namespaces    // rather than the node name.    //    // %TBD% Note that the DOM does not necessarily explicitly declare    // all the namespaces it uses. DOM Level 3 will introduce a    // namespace-normalization operation which reconciles that, and we    // can request that users invoke it or otherwise ensure that the    // tree is namespace-well-formed before passing the DOM to Xalan.    // But if they don't, what should we do about it? We probably    // don't want to alter the source DOM (and may not be able to do    // so if it's read-only). The best available answer might be to    // synthesize additional DTM Namespace Nodes that don't correspond    // to DOM Attr Nodes.    if (Node.ATTRIBUTE_NODE == type)    {      String name = node.getNodeName();      if (name.startsWith("xmlns:") || name.equals("xmlns"))      {        type = DTM.NAMESPACE_NODE;      }    }        m_nodes.addElement(node);        m_firstch.setElementAt(NOTPROCESSED,nodeIndex);    m_nextsib.setElementAt(NOTPROCESSED,nodeIndex);    m_prevsib.setElementAt(previousSibling,nodeIndex);    m_parent.setElementAt(parentIndex,nodeIndex);        if(DTM.NULL != parentIndex &&        type != DTM.ATTRIBUTE_NODE &&        type != DTM.NAMESPACE_NODE)    {      // If the DTM parent had no children, this becomes its first child.      if(NOTPROCESSED == m_firstch.elementAt(parentIndex))        m_firstch.setElementAt(nodeIndex,parentIndex);    }        String nsURI = node.getNamespaceURI();    // Deal with the difference between Namespace spec and XSLT    // definitions of local name. (The former says PIs don't have    // localnames; the latter says they do.)    String localName =  (type == Node.PROCESSING_INSTRUCTION_NODE) ?                          node.getNodeName() :                         node.getLocalName();                             // Hack to make DOM1 sort of work...    if(((type == Node.ELEMENT_NODE) || (type == Node.ATTRIBUTE_NODE))         && null == localName)      localName = node.getNodeName(); // -sb          ExpandedNameTable exnt = m_expandedNameTable;    // %TBD% Nodes created with the old non-namespace-aware DOM    // calls createElement() and createAttribute() will never have a    // localname. That will cause their expandedNameID to be just the    // nodeType... which will keep them from being matched    // successfully by name. Since the DOM makes no promise that    // those will participate in namespace processing, this is    // officially accepted as Not Our Fault. But it might be nice to    // issue a diagnostic message!    if(node.getLocalName()==null &&       (type==Node.ELEMENT_NODE || type==Node.ATTRIBUTE_NODE))      {        // warning("DOM 'level 1' node "+node.getNodeName()+" won't be mapped properly in DOM2DTM.");      }        int expandedNameID = (null != localName)        ? exnt.getExpandedTypeID(nsURI, localName, type) :         exnt.getExpandedTypeID(type);    m_exptype.setElementAt(expandedNameID,nodeIndex);        indexNode(expandedNameID, nodeIndex);    if (DTM.NULL != previousSibling)      m_nextsib.setElementAt(nodeIndex,previousSibling);    // This should be done after m_exptype has been set, and probably should    // always be the last thing we do    if (type == DTM.NAMESPACE_NODE)        declareNamespaceInContext(parentIndex,nodeIndex);    return nodeIndex;  }    /**   * Get the number of nodes that have been added.   */  public int getNumberOfNodes()  {    return m_nodes.size();  }   /**   * This method iterates to the next node that will be added to the table.   * Each call to this method adds a new node to the table, unless the end   * is reached, in which case it returns null.   *   * @return The true if a next node is found or false if    *         there are no more nodes.   */  protected boolean nextNode()  {    // Non-recursive one-fetch-at-a-time depth-first traversal with     // attribute/namespace nodes and white-space stripping.    // Navigating the DOM is simple, navigating the DTM is simple;    // keeping track of both at once is a trifle baroque but at least    // we've avoided most of the special cases.    if (m_nodesAreProcessed)      return false;            // %REVIEW% Is this local copy Really Useful from a performance    // point of view?  Or is this a false microoptimization?    Node pos=m_pos;     Node next=null;    int nexttype=NULL;    // Navigate DOM tree    do      {        // Look down to first child.        if (pos.hasChildNodes())           {            next = pos.getFirstChild();            // %REVIEW% There's probably a more elegant way to skip            // the doctype. (Just let it go and Suppress it?            if(next!=null && DOCUMENT_TYPE_NODE==next.getNodeType())              next=next.getNextSibling();            // Push DTM context -- except for children of Entity References,             // which have no DTM equivalent and cause no DTM navigation.            if(ENTITY_REFERENCE_NODE!=pos.getNodeType())              {                m_last_parent=m_last_kid;                m_last_kid=NULL;                // Whitespace-handler context stacking                if(null != m_wsfilter)                {                  short wsv =                    m_wsfilter.getShouldStripSpace(makeNodeHandle(m_last_parent),this);                  boolean shouldStrip = (DTMWSFilter.INHERIT == wsv)                     ? getShouldStripWhitespace()                     : (DTMWSFilter.STRIP == wsv);                  pushShouldStripWhitespace(shouldStrip);                } // if(m_wsfilter)              }          }        // If that fails, look up and right (but not past root!)        else           {            if(m_last_kid!=NULL)              {                // Last node posted at this level had no more children                // If it has _no_ children, we need to record that.                if(m_firstch.elementAt(m_last_kid)==NOTPROCESSED)                  m_firstch.setElementAt(NULL,m_last_kid);              }                                    while(m_last_parent != NULL)              {                // %REVIEW% There's probably a more elegant way to                // skip the doctype. (Just let it go and Suppress it?                next = pos.getNextSibling();                if(next!=null && DOCUMENT_TYPE_NODE==next.getNodeType())                  next=next.getNextSibling();                if(next!=null)                  break; // Found it!                                // No next-sibling found. Pop the DOM.                pos=pos.getParentNode();                if(pos==null)                  {                    // %TBD% Should never arise, but I want to be sure of that...                    if(JJK_DEBUG)                      {                        System.out.println("***** DOM2DTM Pop Control Flow problem");                        for(;;); // Freeze right here!                      }                  }                                // The only parents in the DTM are Elements.  However,                // the DOM could contain EntityReferences.  If we                // encounter one, pop it _without_ popping DTM.                if(pos!=null && ENTITY_REFERENCE_NODE == pos.getNodeType())                  {                    // Nothing needs doing                    if(JJK_DEBUG)                      System.out.println("***** DOM2DTM popping EntRef");                  }                else                  {                    popShouldStripWhitespace();                    // Fix and pop DTM                    if(m_last_kid==NULL)                      m_firstch.setElementAt(NULL,m_last_parent); // Popping from an element                    else                      m_nextsib.setElementAt(NULL,m_last_kid); // Popping from anything else                    m_last_parent=m_parent.elementAt(m_last_kid=m_last_parent);                  }              }            if(m_last_parent==NULL)              next=null;          }                        if(next!=null)          nexttype=next.getNodeType();                        // If it's an entity ref, advance past it.        //        // %REVIEW% Should we let this out the door and just suppress it?        // More work, but simpler code, more likely to be correct, and        // it doesn't happen very often. We'd get rid of the loop too.        if (ENTITY_REFERENCE_NODE == nexttype)          pos=next;      }    while (ENTITY_REFERENCE_NODE == nexttype); 

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?