📄 dfacontentmodel.java
字号:
if (type == XMLContentSpec.CONTENTSPECNODE_LEAF) { //System.out.println("fElemMap["+elemIndex+"]: "+fElemMap[elemIndex]); if (fElemMap[elemIndex].rawname == curElem.rawname) { break; } } else if (type == XMLContentSpec.CONTENTSPECNODE_ANY) { String uri = fElemMap[elemIndex].uri; if (uri == null || uri == curElem.uri) { break; } } else if (type == XMLContentSpec.CONTENTSPECNODE_ANY_LOCAL) { if (curElem.uri == null) { break; } } else if (type == XMLContentSpec.CONTENTSPECNODE_ANY_OTHER) { if (fElemMap[elemIndex].uri != curElem.uri) { break; } } } // If we didn't find it, then obviously not valid if (elemIndex == fElemMapSize) { if (DEBUG_VALIDATE_CONTENT) { System.out.println("!!! didn't find it"); System.out.println("curElem : " +curElem ); for (int i=0; i<fElemMapSize; i++) { System.out.println("fElemMap["+i+"] = " +fElemMap[i] ); System.out.println("fElemMapType["+i+"] = " +fElemMapType[i] ); } } return childIndex; } // // Look up the next state for this input symbol when in the // current state. // curState = fTransTable[curState][elemIndex]; // If its not a legal transition, then invalid if (curState == -1) { if (DEBUG_VALIDATE_CONTENT) System.out.println("!!! not a legal transition"); return childIndex; } } // // We transitioned all the way through the input list. However, that // does not mean that we ended in a final state. So check whether // our ending state is a final state. // if (DEBUG_VALIDATE_CONTENT) System.out.println("curState="+curState+", childCount="+length); if (!fFinalStateFlags[curState]) return length; // success! return -1; } // validate // // Private methods // /** * Builds the internal DFA transition table from the given syntax tree. * * @param syntaxTree The syntax tree. * * @exception CMException Thrown if DFA cannot be built. */ private void buildDFA(CMNode syntaxTree) { // // The first step we need to take is to rewrite the content model // using our CMNode objects, and in the process get rid of any // repetition short cuts, converting them into '*' style repetitions // or getting rid of repetitions altogether. // // The conversions done are: // // x+ -> (x|x*) // x? -> (x|epsilon) // // This is a relatively complex scenario. What is happening is that // we create a top level binary node of which the special EOC value // is set as the right side node. The the left side is set to the // rewritten syntax tree. The source is the original content model // info from the decl pool. The rewrite is done by buildSyntaxTree() // which recurses the decl pool's content of the element and builds // a new tree in the process. // // Note that, during this operation, we set each non-epsilon leaf // node's DFA state position and count the number of such leafs, which // is left in the fLeafCount member. // // The nodeTmp object is passed in just as a temp node to use during // the recursion. Otherwise, we'd have to create a new node on every // level of recursion, which would be piggy in Java (as is everything // for that matter.) // /* MODIFIED (Jan, 2001) * * Use following rules. * nullable(x+) := nullable(x), first(x+) := first(x), last(x+) := last(x) * nullable(x?) := true, first(x?) := first(x), last(x?) := last(x) * * The same computation of follow as x* is applied to x+ * * The modification drastically reduces computation time of * "(a, (b, a+, (c, (b, a+)+, a+, (d, (c, (b, a+)+, a+)+, (b, a+)+, a+)+)+)+)+" */ fQName.setValues(null, fEOCString, fEOCString, null); CMLeaf nodeEOC = new CMLeaf(fQName); fHeadNode = new CMBinOp ( XMLContentSpec.CONTENTSPECNODE_SEQ , syntaxTree , nodeEOC ); // // And handle specially the EOC node, which also must be numbered // and counted as a non-epsilon leaf node. It could not be handled // in the above tree build because it was created before all that // started. We save the EOC position since its used during the DFA // building loop. // fEOCPos = fLeafCount; nodeEOC.setPosition(fLeafCount++); // // Ok, so now we have to iterate the new tree and do a little more // work now that we know the leaf count. One thing we need to do is // to calculate the first and last position sets of each node. This // is cached away in each of the nodes. // // Along the way we also set the leaf count in each node as the // maximum state count. They must know this in order to create their // first/last pos sets. // // We also need to build an array of references to the non-epsilon // leaf nodes. Since we iterate it in the same way as before, this // will put them in the array according to their position values. // fLeafList = new CMLeaf[fLeafCount]; fLeafListType = new int[fLeafCount]; postTreeBuildInit(fHeadNode, 0); // // And, moving onward... We now need to build the follow position // sets for all the nodes. So we allocate an array of state sets, // one for each leaf node (i.e. each DFA position.) // fFollowList = new CMStateSet[fLeafCount]; for (int index = 0; index < fLeafCount; index++) fFollowList[index] = new CMStateSet(fLeafCount); calcFollowList(fHeadNode); // // And finally the big push... Now we build the DFA using all the // states and the tree we've built up. First we set up the various // data structures we are going to use while we do this. // // First of all we need an array of unique element names in our // content model. For each transition table entry, we need a set of // contiguous indices to represent the transitions for a particular // input element. So we need to a zero based range of indexes that // map to element types. This element map provides that mapping. // fElemMap = new QName[fLeafCount]; fElemMapType = new int[fLeafCount]; fElemMapSize = 0; for (int outIndex = 0; outIndex < fLeafCount; outIndex++) { fElemMap[outIndex] = new QName(); /**** if ( (fLeafListType[outIndex] & 0x0f) != 0 ) { if (fLeafNameTypeVector == null) { fLeafNameTypeVector = new ContentLeafNameTypeVector(); } } /****/ // Get the current leaf's element index final QName element = fLeafList[outIndex].getElement(); // See if the current leaf node's element index is in the list int inIndex = 0; for (; inIndex < fElemMapSize; inIndex++) { if (fElemMap[inIndex].rawname == element.rawname) { break; } } // If it was not in the list, then add it, if not the EOC node if (inIndex == fElemMapSize) { fElemMap[fElemMapSize].setValues(element); fElemMapType[fElemMapSize] = fLeafListType[outIndex]; fElemMapSize++; } } // set up the fLeafNameTypeVector object if there is one. /***** if (fLeafNameTypeVector != null) { fLeafNameTypeVector.setValues(fElemMap, fElemMapType, fElemMapSize); } /*** * Optimization(Jan, 2001); We sort fLeafList according to * elemIndex which is *uniquely* associated to each leaf. * We are *assuming* that each element appears in at least one leaf. **/ int[] fLeafSorter = new int[fLeafCount + fElemMapSize]; int fSortCount = 0; for (int elemIndex = 0; elemIndex < fElemMapSize; elemIndex++) { for (int leafIndex = 0; leafIndex < fLeafCount; leafIndex++) { final QName leaf = fLeafList[leafIndex].getElement(); final QName element = fElemMap[elemIndex]; if (leaf.rawname == element.rawname) { fLeafSorter[fSortCount++] = leafIndex; } } fLeafSorter[fSortCount++] = -1; } /* Optimization(Jan, 2001) */ // // Next lets create some arrays, some that that hold transient // information during the DFA build and some that are permament. // These are kind of sticky since we cannot know how big they will // get, but we don't want to use any Java collections because of // performance. // // Basically they will probably be about fLeafCount*2 on average, // but can be as large as 2^(fLeafCount*2), worst case. So we start // with fLeafCount*4 as a middle ground. This will be very unlikely // to ever have to expand, though it if does, the overhead will be // somewhat ugly. // int curArraySize = fLeafCount * 4; CMStateSet[] statesToDo = new CMStateSet[curArraySize]; fFinalStateFlags = new boolean[curArraySize]; fTransTable = new int[curArraySize][]; // // Ok we start with the initial set as the first pos set of the // head node (which is the seq node that holds the content model // and the EOC node.) // CMStateSet setT = fHeadNode.firstPos(); // // Init our two state flags. Basically the unmarked state counter // is always chasing the current state counter. When it catches up, // that means we made a pass through that did not add any new states // to the lists, at which time we are done. We could have used a // expanding array of flags which we used to mark off states as we // complete them, but this is easier though less readable maybe. // int unmarkedState = 0; int curState = 0; // // Init the first transition table entry, and put the initial state // into the states to do list, then bump the current state. // fTransTable[curState] = makeDefStateList(); statesToDo[curState] = setT; curState++; /* Optimization(Jan, 2001); This is faster for * a large content model such as, "(t001+|t002+|.... |t500+)". */ java.util.Hashtable stateTable = new java.util.Hashtable(); /* Optimization(Jan, 2001) */ // // Ok, almost done with the algorithm... We now enter the // loop where we go until the states done counter catches up with // the states to do counter. // while (unmarkedState < curState) { // // Get the first unmarked state out of the list of states to do. // And get the associated transition table entry. // setT = statesToDo[unmarkedState]; int[] transEntry = fTransTable[unmarkedState]; // Mark this one final if it contains the EOC state fFinalStateFlags[unmarkedState] = setT.getBit(fEOCPos); // Bump up the unmarked state count, marking this state done unmarkedState++; // Loop through each possible input symbol in the element map CMStateSet newSet = null; /* Optimization(Jan, 2001) */ int sorterIndex = 0; /* Optimization(Jan, 2001) */ for (int elemIndex = 0; elemIndex < fElemMapSize; elemIndex++) { // // Build up a set of states which is the union of all of // the follow sets of DFA positions that are in the current // state. If we gave away the new set last time through then // create a new one. Otherwise, zero out the existing one. // if (newSet == null) newSet = new CMStateSet(fLeafCount); else newSet.zeroBits(); /* Optimization(Jan, 2001) */ int leafIndex = fLeafSorter[sorterIndex++]; while (leafIndex != -1) { // If this leaf index (DFA position) is in the current set... if (setT.getBit(leafIndex)) { // // If this leaf is the current input symbol, then we // want to add its follow list to the set of states to // transition to from the current state. // newSet.union(fFollowList[leafIndex]);
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -