dom2dtm.java
来自「java1.6众多例子参考」· Java 代码 · 共 1,764 行 · 第 1/4 页
JAVA
1,764 行
/* * Copyright 1999-2004 The Apache Software Foundation. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. *//* * $Id: DOM2DTM.java,v 1.2.4.1 2005/09/15 08:15:10 suresh_emailid Exp $ */package com.sun.org.apache.xml.internal.dtm.ref.dom2dtm;import java.util.Vector;import javax.xml.transform.SourceLocator;import javax.xml.transform.dom.DOMSource;import com.sun.org.apache.xml.internal.dtm.DTM;import com.sun.org.apache.xml.internal.dtm.DTMManager;import com.sun.org.apache.xml.internal.dtm.DTMWSFilter;import com.sun.org.apache.xml.internal.dtm.ref.DTMDefaultBaseIterators;import com.sun.org.apache.xml.internal.dtm.ref.DTMManagerDefault;import com.sun.org.apache.xml.internal.dtm.ref.ExpandedNameTable;import com.sun.org.apache.xml.internal.dtm.ref.IncrementalSAXSource;import com.sun.org.apache.xml.internal.res.XMLErrorResources;import com.sun.org.apache.xml.internal.res.XMLMessages;import com.sun.org.apache.xml.internal.utils.FastStringBuffer;import com.sun.org.apache.xml.internal.utils.QName;import com.sun.org.apache.xml.internal.utils.StringBufferPool;import com.sun.org.apache.xml.internal.utils.TreeWalker;import com.sun.org.apache.xml.internal.utils.XMLCharacterRecognizer;import com.sun.org.apache.xml.internal.utils.XMLString;import com.sun.org.apache.xml.internal.utils.XMLStringFactory;import org.w3c.dom.Attr;import org.w3c.dom.Document;import org.w3c.dom.DocumentType;import org.w3c.dom.Element;import org.w3c.dom.Entity;import org.w3c.dom.NamedNodeMap;import org.w3c.dom.Node;import org.xml.sax.ContentHandler;/** The <code>DOM2DTM</code> class serves up a DOM's contents via the * DTM API. * * Note that it doesn't necessarily represent a full Document * tree. You can wrap a DOM2DTM around a specific node and its subtree * and the right things should happen. (I don't _think_ we currently * support DocumentFrgment nodes as roots, though that might be worth * considering.) * * Note too that we do not currently attempt to track document * mutation. If you alter the DOM after wrapping DOM2DTM around it, * all bets are off. * */public class DOM2DTM extends DTMDefaultBaseIterators{ static final boolean JJK_DEBUG=false; static final boolean JJK_NEWCODE=true; /** Manefest constant */ static final String NAMESPACE_DECL_NS="http://www.w3.org/XML/1998/namespace"; /** The current position in the DOM tree. Last node examined for * possible copying to DTM. */ transient private Node m_pos; /** The current position in the DTM tree. Who children get appended to. */ private int m_last_parent=0; /** The current position in the DTM tree. Who children reference as their * previous sib. */ private int m_last_kid=NULL; /** The top of the subtree. * %REVIEW%: 'may not be the same as m_context if "//foo" pattern.' * */ transient private Node m_root; /** True iff the first element has been processed. This is used to control synthesis of the implied xml: namespace declaration node. */ boolean m_processedFirstElement=false; /** true if ALL the nodes in the m_root subtree have been processed; * false if our incremental build has not yet finished scanning the * DOM tree. */ transient private boolean m_nodesAreProcessed; /** The node objects. The instance part of the handle indexes * directly into this vector. Each DTM node may actually be * composed of several DOM nodes (for example, if logically-adjacent * Text/CDATASection nodes in the DOM have been coalesced into a * single DTM Text node); this table points only to the first in * that sequence. */ protected Vector m_nodes = new Vector(); /** * Construct a DOM2DTM object from a DOM node. * * @param mgr The DTMManager who owns this DTM. * @param domSource the DOM source that this DTM will wrap. * @param dtmIdentity The DTM identity ID for this DTM. * @param whiteSpaceFilter The white space filter for this DTM, which may * be null. * @param xstringfactory XMLString factory for creating character content. * @param doIndexing true if the caller considers it worth it to use * indexing schemes. */ public DOM2DTM(DTMManager mgr, DOMSource domSource, int dtmIdentity, DTMWSFilter whiteSpaceFilter, XMLStringFactory xstringfactory, boolean doIndexing) { super(mgr, domSource, dtmIdentity, whiteSpaceFilter, xstringfactory, doIndexing); // Initialize DOM navigation m_pos=m_root = domSource.getNode(); // Initialize DTM navigation m_last_parent=m_last_kid=NULL; m_last_kid=addNode(m_root, m_last_parent,m_last_kid, NULL); // Apparently the domSource root may not actually be the // Document node. If it's an Element node, we need to immediately // add its attributes. Adapted from nextNode(). // %REVIEW% Move this logic into addNode and recurse? Cleaner! // // (If it's an EntityReference node, we're probably scrod. For now // I'm just hoping nobody is ever quite that foolish... %REVIEW%) // // %ISSUE% What about inherited namespaces in this case? // Do we need to special-case initialize them into the DTM model? if(ELEMENT_NODE == m_root.getNodeType()) { NamedNodeMap attrs=m_root.getAttributes(); int attrsize=(attrs==null) ? 0 : attrs.getLength(); if(attrsize>0) { int attrIndex=NULL; // start with no previous sib for(int i=0;i<attrsize;++i) { // No need to force nodetype in this case; // addNode() will take care of switching it from // Attr to Namespace if necessary. attrIndex=addNode(attrs.item(i),0,attrIndex,NULL); m_firstch.setElementAt(DTM.NULL,attrIndex); } // Terminate list of attrs, and make sure they aren't // considered children of the element m_nextsib.setElementAt(DTM.NULL,attrIndex); // IMPORTANT: This does NOT change m_last_parent or m_last_kid! } // if attrs exist } //if(ELEMENT_NODE) // Initialize DTM-completed status m_nodesAreProcessed = false; } /** * Construct the node map from the node. * * @param node The node that is to be added to the DTM. * @param parentIndex The current parent index. * @param previousSibling The previous sibling index. * @param forceNodeType If not DTM.NULL, overrides the DOM node type. * Used to force nodes to Text rather than CDATASection when their * coalesced value includes ordinary Text nodes (current DTM behavior). * * @return The index identity of the node that was added. */ protected int addNode(Node node, int parentIndex, int previousSibling, int forceNodeType) { int nodeIndex = m_nodes.size(); // Have we overflowed a DTM Identity's addressing range? if(m_dtmIdent.size() == (nodeIndex>>>DTMManager.IDENT_DTM_NODE_BITS)) { try { if(m_mgr==null) throw new ClassCastException(); // Handle as Extended Addressing DTMManagerDefault mgrD=(DTMManagerDefault)m_mgr; int id=mgrD.getFirstFreeDTMID(); mgrD.addDTM(this,id,nodeIndex); m_dtmIdent.addElement(id<<DTMManager.IDENT_DTM_NODE_BITS); } catch(ClassCastException e) { // %REVIEW% Wrong error message, but I've been told we're trying // not to add messages right not for I18N reasons. // %REVIEW% Should this be a Fatal Error? error(XMLMessages.createXMLMessage(XMLErrorResources.ER_NO_DTMIDS_AVAIL, null));//"No more DTM IDs are available"; } } m_size++; // ensureSize(nodeIndex); int type; if(NULL==forceNodeType) type = node.getNodeType(); else type=forceNodeType; // %REVIEW% The Namespace Spec currently says that Namespaces are // processed in a non-namespace-aware manner, by matching the // QName, even though there is in fact a namespace assigned to // these nodes in the DOM. If and when that changes, we will have // to consider whether we check the namespace-for-namespaces // rather than the node name. // // %TBD% Note that the DOM does not necessarily explicitly declare // all the namespaces it uses. DOM Level 3 will introduce a // namespace-normalization operation which reconciles that, and we // can request that users invoke it or otherwise ensure that the // tree is namespace-well-formed before passing the DOM to Xalan. // But if they don't, what should we do about it? We probably // don't want to alter the source DOM (and may not be able to do // so if it's read-only). The best available answer might be to // synthesize additional DTM Namespace Nodes that don't correspond // to DOM Attr Nodes. if (Node.ATTRIBUTE_NODE == type) { String name = node.getNodeName(); if (name.startsWith("xmlns:") || name.equals("xmlns")) { type = DTM.NAMESPACE_NODE; } } m_nodes.addElement(node); m_firstch.setElementAt(NOTPROCESSED,nodeIndex); m_nextsib.setElementAt(NOTPROCESSED,nodeIndex); m_prevsib.setElementAt(previousSibling,nodeIndex); m_parent.setElementAt(parentIndex,nodeIndex); if(DTM.NULL != parentIndex && type != DTM.ATTRIBUTE_NODE && type != DTM.NAMESPACE_NODE) { // If the DTM parent had no children, this becomes its first child. if(NOTPROCESSED == m_firstch.elementAt(parentIndex)) m_firstch.setElementAt(nodeIndex,parentIndex); } String nsURI = node.getNamespaceURI(); // Deal with the difference between Namespace spec and XSLT // definitions of local name. (The former says PIs don't have // localnames; the latter says they do.) String localName = (type == Node.PROCESSING_INSTRUCTION_NODE) ? node.getNodeName() : node.getLocalName(); // Hack to make DOM1 sort of work... if(((type == Node.ELEMENT_NODE) || (type == Node.ATTRIBUTE_NODE)) && null == localName) localName = node.getNodeName(); // -sb ExpandedNameTable exnt = m_expandedNameTable; // %TBD% Nodes created with the old non-namespace-aware DOM // calls createElement() and createAttribute() will never have a // localname. That will cause their expandedNameID to be just the // nodeType... which will keep them from being matched // successfully by name. Since the DOM makes no promise that // those will participate in namespace processing, this is // officially accepted as Not Our Fault. But it might be nice to // issue a diagnostic message! if(node.getLocalName()==null && (type==Node.ELEMENT_NODE || type==Node.ATTRIBUTE_NODE)) { // warning("DOM 'level 1' node "+node.getNodeName()+" won't be mapped properly in DOM2DTM."); } int expandedNameID = (null != localName) ? exnt.getExpandedTypeID(nsURI, localName, type) : exnt.getExpandedTypeID(type); m_exptype.setElementAt(expandedNameID,nodeIndex); indexNode(expandedNameID, nodeIndex); if (DTM.NULL != previousSibling) m_nextsib.setElementAt(nodeIndex,previousSibling); // This should be done after m_exptype has been set, and probably should // always be the last thing we do if (type == DTM.NAMESPACE_NODE) declareNamespaceInContext(parentIndex,nodeIndex); return nodeIndex; } /** * Get the number of nodes that have been added. */ public int getNumberOfNodes() { return m_nodes.size(); } /** * This method iterates to the next node that will be added to the table. * Each call to this method adds a new node to the table, unless the end * is reached, in which case it returns null. * * @return The true if a next node is found or false if * there are no more nodes. */ protected boolean nextNode() { // Non-recursive one-fetch-at-a-time depth-first traversal with // attribute/namespace nodes and white-space stripping. // Navigating the DOM is simple, navigating the DTM is simple; // keeping track of both at once is a trifle baroque but at least // we've avoided most of the special cases. if (m_nodesAreProcessed) return false; // %REVIEW% Is this local copy Really Useful from a performance // point of view? Or is this a false microoptimization? Node pos=m_pos; Node next=null; int nexttype=NULL; // Navigate DOM tree do { // Look down to first child. if (pos.hasChildNodes()) { next = pos.getFirstChild(); // %REVIEW% There's probably a more elegant way to skip // the doctype. (Just let it go and Suppress it? if(next!=null && DOCUMENT_TYPE_NODE==next.getNodeType()) next=next.getNextSibling(); // Push DTM context -- except for children of Entity References, // which have no DTM equivalent and cause no DTM navigation. if(ENTITY_REFERENCE_NODE!=pos.getNodeType()) { m_last_parent=m_last_kid; m_last_kid=NULL; // Whitespace-handler context stacking if(null != m_wsfilter) { short wsv = m_wsfilter.getShouldStripSpace(makeNodeHandle(m_last_parent),this); boolean shouldStrip = (DTMWSFilter.INHERIT == wsv) ? getShouldStripWhitespace() : (DTMWSFilter.STRIP == wsv); pushShouldStripWhitespace(shouldStrip); } // if(m_wsfilter) } } // If that fails, look up and right (but not past root!) else { if(m_last_kid!=NULL) { // Last node posted at this level had no more children // If it has _no_ children, we need to record that. if(m_firstch.elementAt(m_last_kid)==NOTPROCESSED) m_firstch.setElementAt(NULL,m_last_kid); } while(m_last_parent != NULL) { // %REVIEW% There's probably a more elegant way to // skip the doctype. (Just let it go and Suppress it? next = pos.getNextSibling(); if(next!=null && DOCUMENT_TYPE_NODE==next.getNodeType()) next=next.getNextSibling(); if(next!=null) break; // Found it! // No next-sibling found. Pop the DOM. pos=pos.getParentNode(); if(pos==null) { // %TBD% Should never arise, but I want to be sure of that... if(JJK_DEBUG) { System.out.println("***** DOM2DTM Pop Control Flow problem"); for(;;); // Freeze right here! } } // The only parents in the DTM are Elements. However, // the DOM could contain EntityReferences. If we // encounter one, pop it _without_ popping DTM. if(pos!=null && ENTITY_REFERENCE_NODE == pos.getNodeType()) { // Nothing needs doing if(JJK_DEBUG) System.out.println("***** DOM2DTM popping EntRef"); } else { popShouldStripWhitespace(); // Fix and pop DTM if(m_last_kid==NULL) m_firstch.setElementAt(NULL,m_last_parent); // Popping from an element else m_nextsib.setElementAt(NULL,m_last_kid); // Popping from anything else m_last_parent=m_parent.elementAt(m_last_kid=m_last_parent); } } if(m_last_parent==NULL) next=null; } if(next!=null) nexttype=next.getNodeType(); // If it's an entity ref, advance past it. // // %REVIEW% Should we let this out the door and just suppress it? // More work, but simpler code, more likely to be correct, and // it doesn't happen very often. We'd get rid of the loop too. if (ENTITY_REFERENCE_NODE == nexttype) pos=next; } while (ENTITY_REFERENCE_NODE == nexttype);
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?