📄 tree2tree.java
字号:
// allocate the temporary data needed. we will reuse // the temporary data for each node. hopefully, this saves // us some time. for(int i=0; i<t2.nodes.size(); i++) { tmpData[i] = new TmpD(); } HashMap h12 = new HashMap(); long start, current; // special case for level 0 since we know how to deal with // it start = System.currentTimeMillis(); for(int i=0; i<t1.nodes.size(); i++) { TreeNode n = (TreeNode) t1.nodes.get(i); NodeScorePair p = computeBestMatch(n, t1, t2, tmpData); h12.put(n, p); } current = System.currentTimeMillis(); System.out.println("best match level 0 preprocessed: "+(current-start)/1000.0f+" sec"); start = current; v12.add(h12); for(int el=1; el<eL; el++) { h12 = new HashMap(); for(int i=0; i<t1.nodes.size(); i++) { TreeNode n = (TreeNode) t1.nodes.get(i); NodeScorePair p = computeBestMatch(n, t1, t2, el*1.0f/(eL-1), tmpData); h12.put(n, p); // System.out.println(n.leftmostLeaf.getName()+","+n.rightmostLeaf.getName()+":"+p.node.leftmostLeaf.getName()+","+p.node.rightmostLeaf.getName()+" "+p.score); } current = System.currentTimeMillis(); System.out.println("best match level "+el+" preprocessed: "+(current-start)/1000.0f+" sec"); start = current; v12.add(h12); } } /** * Computes the dissimilarity distance between treeA and treeB * * The distance is computed by summing up getBestCorrNode scores * between each tree node in treeA and the best matching node in * treeB. * * <Pre> * mode 0: alpha is the cut off * 1: (1-s)^\alpha * 2: 1-s^{1/\alpha} * </Pre> * */ float computeDistance(float alpha, int mode) { float dif1 = 0.0f; float s = 0.0f; for(int i=0; i<treeA.nodes.size(); i++) { try { s = getBestCorrNodeScore(treeA, (TreeNode) treeA.nodes.get(i), treeB, 0);} catch (Exception e) { } if(alpha==0) { if(s<1.0f) dif1+=1.0f; } else if (alpha==1) { dif1 += (1-s); } else { if(mode == 0) { s = 1.0f - s; // convert similarity to distance if(s>alpha) dif1+=1.0; else dif1 += s; } else if(mode == 1) { s = 1.0f - s; // convert similarity to distance dif1 += Math.pow(s,alpha); } else if (mode == 2) { dif1 += (1-Math.pow(s, 1.0/alpha)); } } } float dif2 = 0.0f; for(int i=0; i<treeB.nodes.size(); i++) { try { s = getBestCorrNodeScore(treeB, (TreeNode) treeB.nodes.get(i), treeA, 0);} catch (Exception e) { } if(alpha==0) { if(s<1.0f) dif2+=1.0f; } else if (alpha==1) { dif2 += (1-s); } else { if(mode == 0) { s = 1.0f - s; // convert similarity to distance if(s>alpha) dif2+=1.0; else dif2 += s; } else if(mode == 1) { s = 1.0f - s; // convert similarity to distance dif2 += Math.pow(s,alpha); } else if (mode == 2) { dif2 += (1-Math.pow(s, 1.0/alpha)); } } } return (dif1>dif2?dif1:dif2); } /** * compute the best corresponding node for each node * * node B is the best corresponding node of node A if it maximizes * | L(A) U L(B)| ---------------- | L(A) n L(B)| where L(A),L(B) represent the set of leaves underneath the node A and node B respectively. For the description of the algorithm, see Li Zhang. On Matching Nodes in Two Trees. **/ NodeScorePair computeBestMatch(TreeNode sourceNode, Tree sourceTree, Tree targetTree, TmpD[] tmpData) { LinkedList mleaves = new LinkedList(); Object[] la;// TreeNode n, nn; for(int currLeafIndex = sourceNode.leftmostLeaf.getLindex(); currLeafIndex <= sourceNode.rightmostLeaf.getLindex(); currLeafIndex++) { TreeNode n = sourceTree.getLeaf(currLeafIndex); TreeNode nn = targetTree.getNodeByName(n.getName()); if(nn!=null) { if(nn.isLeaf()) { tmpData[nn.key].tmpScore = 1; mleaves.add(nn); } } } if(mleaves.size()==0) { return new NodeScorePair(null, 0.0f); } else if(mleaves.size()==1) { return new NodeScorePair((TreeNode)(mleaves.get(0)), 1.0f/sourceNode.numberLeaves); } la = mleaves.toArray(); Arrays.sort(la, new LeafComparator()); int len = la.length; // build the simplified spanning tree by repeated bay traversal. // the following precedure can be probably best understood by TreeNode tmpPosorderStart = (TreeNode)(la[0]); TreeNode lastNode = (TreeNode)(la[len-1]); TreeNode tmpRoot = tmpPosorderStart.leafLca(lastNode); tmpData[tmpPosorderStart.key].tmpParent = tmpRoot; tmpData[lastNode.key].tmpParent = tmpRoot; tmpData[tmpPosorderStart.key].tmpPosorderNext = lastNode; tmpData[lastNode.key].tmpPosorderNext = tmpRoot; Stack bay = new Stack(); TreeNode prev, pprev,a; bay.push(tmpRoot); bay.push(tmpPosorderStart); for(int i=1; i<len-1; i++) { TreeNode nn = (TreeNode) (la[i]); if(nn.getLindex() == ((TreeNode)la[i-1]).getLindex()) continue; // no duplication, please a = nn.leafLca((TreeNode)(la[i-1])); prev = null; pprev = null; while(!bay.empty()) { pprev = prev; prev = (TreeNode) (bay.peek()); if(prev.isAncestorOf(a)) { if(prev==a) bay.pop(); break; } bay.pop(); } if(bay.empty()) { a = nn.leafLca(lastNode); if(a==prev) { // not a binary tree tmpData[nn.key].tmpParent = prev; tmpData[pprev.key].tmpPosorderNext = nn; // we need to deal with non-binary tree and when // the node inserted is a child of the root if(prev==tmpRoot) tmpData[nn.key].tmpPosorderNext = lastNode; else tmpData[nn.key].tmpPosorderNext = a; } else { // the node is inserted to the right branch // and creat the new tmpRoot tmpData[a.key].tmpParent = tmpData[lastNode.key].tmpParent; tmpData[lastNode.key].tmpParent = a; tmpData[nn.key].tmpParent = a; tmpData[a.key].tmpPosorderNext = tmpData[lastNode.key].tmpPosorderNext; tmpData[pprev.key].tmpPosorderNext = nn; tmpData[nn.key].tmpPosorderNext = lastNode; tmpData[lastNode.key].tmpPosorderNext = a; tmpRoot = a; } } else { if(a==prev) { tmpData[nn.key].tmpParent = prev; tmpData[pprev.key].tmpPosorderNext = nn; tmpData[nn.key].tmpPosorderNext = a; } else { tmpData[a.key].tmpParent = tmpData[pprev.key].tmpParent; tmpData[pprev.key].tmpParent = a; tmpData[nn.key].tmpParent = a; tmpData[a.key].tmpPosorderNext = tmpData[pprev.key].tmpPosorderNext; tmpData[pprev.key].tmpPosorderNext = nn; tmpData[nn.key].tmpPosorderNext = a; } } bay.push(a); bay.push(nn); } // walk up in the tree, compute the score of each node in the // spanning tree, and find the maximum score TreeNode match = null; float matchScore = 0.0f; float currentMatchScore; int sizeofUnion; for(TreeNode n = tmpPosorderStart; n!=null; n=tmpData[n.key].tmpPosorderNext) { sizeofUnion = sourceNode.numberLeaves+n.numberLeaves-tmpData[n.key].tmpScore; currentMatchScore = (float)(tmpData[n.key].tmpScore*1.0/sizeofUnion); if(matchScore<currentMatchScore) { match = n; matchScore = currentMatchScore; } if(matchScore == 1.0f) break; TreeNode np = tmpData[n.key].tmpParent; if(np != null) { tmpData[np.key].tmpScore += tmpData[n.key].tmpScore; } } // cleanup the temporary data TreeNode n = tmpPosorderStart; while(n!=null) { tmpData[n.key].tmpScore = 0; TreeNode nn = tmpData[n.key].tmpPosorderNext; tmpData[n.key].tmpPosorderNext = null; tmpData[n.key].tmpParent = null; n = nn; } return new NodeScorePair(match, matchScore); } // deal with edge weights NodeScorePair computeBestMatch(TreeNode sourceNode, Tree sourceTree, Tree targetTree, float alpha, TmpD[] tmpData) { // compute the path length to each leaf node HashMap h = new HashMap(); float tSum = 0; for (int currLeafIndex = sourceNode.leftmostLeaf.getLindex(); currLeafIndex <= sourceNode.rightmostLeaf.getLindex(); currLeafIndex++) { TreeNode currSourceLeaf = sourceTree.getLeaf(currLeafIndex); TreeNode p = currSourceLeaf; float pathLen = epsilon; while(p!=sourceNode) { pathLen += p.weight; p = p.parent; } float pathLenA = (float)Math.pow(pathLen, alpha); // float pathLenA = Math.exp(alpha*Math.log(pathLen)); h.put(currSourceLeaf.getName(), new Double(pathLenA)); tSum += pathLenA; } // prepare tmpData for(int i=0; i<tmpData.length; i++) { TmpD tmpd = tmpData[i]; tmpd.uSum = 0; // assume that the leaves are not presented in each node // and then correct the assumption later. tmpd.lSum = tSum; } // for each leaf node of t, accumulate the score bottom up // along the path to the root. for (int currTargetLeafIndex = 0; currTargetLeafIndex < targetTree.getLeafCount(); currTargetLeafIndex++) { TreeNode currTargetLeaf = targetTree.getLeaf(currTargetLeafIndex); float pathLen = epsilon; Double tpl = (Double)(h.get(currTargetLeaf.getName())); TreeNode p = currTargetLeaf; if(tpl == null) { // the leaf is not in the subtree rooted at an while(p!=null) { // tmpData[p.key].uSum += 0; tmpData[p.key].lSum += Math.pow(pathLen, alpha); // tmpData[p.key].lSum += Math.exp(alpha*Math.log(pathLen)); pathLen += p.weight; p = p.parent; } } else { float plen = tpl.floatValue(); while(p!=null) { TmpD tmpd = tmpData[p.key]; tmpd.lSum -= plen; float plenp = (float)Math.pow(pathLen, alpha); // float plenp = Math.exp(alpha*Math.log(pathLen)); if(plenp < plen) { tmpd.uSum += plenp; tmpd.lSum += plen; } else { tmpd.uSum += plen; tmpd.lSum += plenp; } pathLen += p.weight; p = p.parent; } } } // traverse the tree and compute the best match and clean up // tmpData TreeNode match = null; float matchScore = 0.0f; float currentMatchScore; for(int i=0; i<targetTree.nodes.size(); i++) { TreeNode n = (TreeNode)(targetTree.nodes.get(i)); TmpD tmpd = tmpData[n.key]; currentMatchScore = tmpd.uSum/tmpd.lSum; if(matchScore<currentMatchScore) { match = n; matchScore = currentMatchScore; } if(matchScore == 1.0f) break; } return new NodeScorePair(match, matchScore); }};
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -