nlvisitor.java

来自「Semantic Web Ontology Editor」· Java 代码 · 共 1,843 行 · 第 1/4 页

JAVA
1,843
字号
//The MIT License//// Copyright (c) 2004 Mindswap Research Group, University of Maryland, College Park//// Permission is hereby granted, free of charge, to any person obtaining a copy// of this software and associated documentation files (the "Software"), to// deal in the Software without restriction, including without limitation the// rights to use, copy, modify, merge, publish, distribute, sublicense, and/or// sell copies of the Software, and to permit persons to whom the Software is// furnished to do so, subject to the following conditions://// The above copyright notice and this permission notice shall be included in// all copies or substantial portions of the Software.//// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS// IN THE SOFTWARE.package org.mindswap.swoop.renderer.entity;import java.io.IOException;import java.io.PrintWriter;import java.io.StringWriter;import java.net.URI;import java.util.ArrayList;import java.util.HashMap;import java.util.HashSet;import java.util.Hashtable;import java.util.Iterator;import java.util.List;import java.util.Set;import java.util.Vector;import org.apache.commons.lang.StringEscapeUtils;import org.mindswap.swoop.SwoopModel;import org.mindswap.swoop.renderer.BaseEntityRenderer;import org.mindswap.swoop.renderer.SwoopRenderingVisitor;import org.semanticweb.owl.io.ShortFormProvider;import org.semanticweb.owl.model.OWLAnd;import org.semanticweb.owl.model.OWLClass;import org.semanticweb.owl.model.OWLDataAllRestriction;import org.semanticweb.owl.model.OWLDataCardinalityRestriction;import org.semanticweb.owl.model.OWLDataEnumeration;import org.semanticweb.owl.model.OWLDataProperty;import org.semanticweb.owl.model.OWLDataPropertyInstance;import org.semanticweb.owl.model.OWLDataPropertyRangeAxiom;import org.semanticweb.owl.model.OWLDataSomeRestriction;import org.semanticweb.owl.model.OWLDataType;import org.semanticweb.owl.model.OWLDataValue;import org.semanticweb.owl.model.OWLDataValueRestriction;import org.semanticweb.owl.model.OWLDescription;import org.semanticweb.owl.model.OWLDifferentIndividualsAxiom;import org.semanticweb.owl.model.OWLDisjointClassesAxiom;import org.semanticweb.owl.model.OWLEnumeration;import org.semanticweb.owl.model.OWLEquivalentClassesAxiom;import org.semanticweb.owl.model.OWLEquivalentPropertiesAxiom;import org.semanticweb.owl.model.OWLException;import org.semanticweb.owl.model.OWLFunctionalPropertyAxiom;import org.semanticweb.owl.model.OWLIndividual;import org.semanticweb.owl.model.OWLIndividualTypeAssertion;import org.semanticweb.owl.model.OWLInverseFunctionalPropertyAxiom;import org.semanticweb.owl.model.OWLInversePropertyAxiom;import org.semanticweb.owl.model.OWLNot;import org.semanticweb.owl.model.OWLObjectAllRestriction;import org.semanticweb.owl.model.OWLObjectCardinalityRestriction;import org.semanticweb.owl.model.OWLObjectProperty;import org.semanticweb.owl.model.OWLObjectPropertyInstance;import org.semanticweb.owl.model.OWLObjectPropertyRangeAxiom;import org.semanticweb.owl.model.OWLObjectSomeRestriction;import org.semanticweb.owl.model.OWLObjectValueRestriction;import org.semanticweb.owl.model.OWLOr;import org.semanticweb.owl.model.OWLProperty;import org.semanticweb.owl.model.OWLPropertyDomainAxiom;import org.semanticweb.owl.model.OWLSameIndividualsAxiom;import org.semanticweb.owl.model.OWLSubClassAxiom;import org.semanticweb.owl.model.OWLSubPropertyAxiom;import org.semanticweb.owl.model.OWLSymmetricPropertyAxiom;import org.semanticweb.owl.model.OWLTransitivePropertyAxiom;import org.semanticweb.owl.model.helper.OWLObjectVisitorAdapter;import qtag.Tagger;public class NLVisitor extends OWLObjectVisitorAdapter implements SwoopRenderingVisitor {	ShortFormProvider shortForms; 	StringWriter sw;	PrintWriter pw;	SwoopModel swoopModel;		static Tagger tagger;		//**** Define elements to create Natural Language Tree	NLTree tree;	NLNode parent;	NLLink linkContext;	HashMap nodeStore;	Hashtable hyperlinkMap;	OWLProperty linkProp;	static String LINK_EQUIVALENT = "is"; // NEW CHANGE	static String LINK_SUBCLASS = "is";	static String LINK_COMPLEMENT = "is not a";	static String LINK_INTERSECTION = "is";	static String LINK_UNION = "or a";	static String LINK_ALLVALUES = "that always";	static String LINK_SOMEVALUES = "that";	static String LINK_HASVALUE = "that";	static String LINK_MAXCARD = "with at most";	static String LINK_MINCARD = "with at least";	static String LINK_CARD = "with exactly";	static String LINK_ONEOF = "either";	static String LINK_BETWEEN = "with between";	//*****		public NLVisitor( ShortFormProvider shortForms, SwoopModel swoopModel )	{		try	{			tagger = new Tagger("lib/qtag-eng");		}		catch (IOException e)	{			System.out.println("Error: POS library not found!");		}		this.shortForms = shortForms;		this.swoopModel = swoopModel;		hyperlinkMap = new Hashtable();		reset();	}		/**	 * HTML-escape an object	 * @param o	 * @return	 */	private static String escape(Object o) {		return StringEscapeUtils.escapeHtml(o.toString());	}		public String result() {		return sw.toString();	}	public void reset() {		sw = new StringWriter();		pw = new PrintWriter( sw );			}		public void resetNLTree(String entityName, String link, int type) {		// set entity name as the root node of the tree		NLNode root = new NLNode(entityName, type);		// create a new NL tree with this root		this.tree = new NLTree(root);		// set current parent in NL-Visitor as root		this.parent = root;		// set current link (use argument passed as linktype)		this.linkContext = new NLLink("", link);		// used to reset parents during each iteration of a and/or/oneof		nodeStore = new HashMap();	}		public void resetParent() {		this.parent = tree.getRoot();	}		public void setLinkContext(String linkType) {		linkContext.setLinkType(linkType);		linkContext.setKeyword("");	}		public void visit( OWLClass clazz ) throws OWLException {		// build NL tree 		String className = getShortForm(clazz.getURI());		String tokens = getEntityTokens(className);        // add hyperlink		hyperlinkMap.put(tokens, "<a href=\"" + clazz.getURI() + "\">" + tokens + "</a>");		NLNode target = new NLNode(tokens, 0);				parent.addLink(linkContext, target);					    	}		public void visit( OWLIndividual ind ) throws OWLException {		// build NL tree 		String indName = getShortForm(ind.getURI());		String tokens = getEntityTokens(indName);        // add hyperlink		hyperlinkMap.put(tokens, "<a href=\"" + ind.getURI() + "\">" + tokens + "</a>");		NLNode target = new NLNode(tokens, 3);				parent.addLink(linkContext, target);			}		// TODO this is just object properties	public void visit( OWLObjectProperty prop ) throws OWLException {		// build NL tree 		String propName = getShortForm(prop.getURI());		String tokens = getEntityTokens(propName).toLowerCase();		String[] tok = getEntityTokens(propName).toLowerCase().split("( )+");		if ( !tokens.startsWith( "has"  ) && !tokens.startsWith( "is" )) {		    			// tagger.setInput(tokens);            String[] tags = tagger.tag(tok);            System.out.print(tokens + " TAG:"  );	                        for ( int i = 0; i < tags.length; i++ ) {                System.out.print( tags[i].toString() + " " );            }            System.out.println();                        if ( tok.length == 1 ) {//              TODO: there is a problem if the word is ambiguous between a noun and a verb                // for example: drives. a possible solution is to get the complete tagging and see if                // such ambiguity exists, and always prefer the verb form, since plural nouns in properties                 // are apparently rare                                // To solve this problems, strips 's' from word and checks if the resulting form can be a verb                if ( tags[0].equals( "NNS" ) ) {                    String possibleVerb = tok[0].substring( 0, tok[0].length() - 2 );                    String[] testTags = tagger.tag( tok );                    if ( testTags[0].startsWith( "V" ) ) {                        tok[0] = possibleVerb;                        tags = tagger.tag(tok);                        //System.out.print(tokens + " NEW TAG:"  );                    }                }                    			if( tags[0].startsWith( "VB" ) ){    			    if ( tags[0].equals( "VBN" ) ) {    			        tokens = "is " + tokens; // FOR TESTING NOW    			    } else if ( tags[0].startsWith( "VBD" ) ) {    			        tokens = "is " + tokens;    			    }                } else if ( tags[0].startsWith( "N" ) && !tags[0].endsWith( "S" ) ) {  // only singular nouns                    tokens = "has " + tokens;                } else {                     //tokens = "has " + tokens;                    // unidentified prop type?                }            } else {                 if ( tags[0].startsWith( "NN" ) ) {                    String possibleVerb = tok[0].substring( 0, tok[0].length() - 2 );                    String[] testTags = tagger.tag( tok );                    if ( testTags[0].startsWith( "V" ) ) {                        tok[0] = possibleVerb;                        tags = tagger.tag(tok);                    }                }                                int propClass = classifyComplexProp( tags, tok );                                System.err.println( propClass );                //              0 = complex np (just multiple nouns)			phone number            	// 1 = np and p 								child of            	// 2 = vp and np								produces wine            	// 3 = vp and p 								located in            	// 4 = vp and pp (p and np)						made from grape            	// -1 = other (unrecognized)					prop12                switch ( propClass ) {                	case 0:                 	    tokens = "has " + tokens;                	    break;            	    case 1:            	        tokens = "is a " + tokens;            	        break;            	    case 2:         	        case 3:         	        case 4:            	        if ( tags[0].equals( "VBN" ) ) {        			        tokens = "is " + tokens; // FOR TESTING NOW        			    } else if ( tags[0].startsWith( "VBD" ) ) {        			        tokens = "is " + tokens;        			    }            	        break;            	    case 5:            	        // unfortunately can't get here            	        String newTokens = new String( tok[0] + "a " );            	        for ( int i = 1; i < tok.length; i++ ) {            	            newTokens = newTokens.concat( tok[i] + " " );            	        }            	                    	        tokens = newTokens;            	                    	        break;                }                           }            		} else if ( tokens.startsWith( "is" ) && tok.length > 1 ) {		    String[] tags = tagger.tag(tok);            System.out.print(tokens + " TAG:"  );	                        for ( int i = 0; i < tags.length; i++ ) {                System.out.print( tags[i].toString() + " " );            }            System.out.println();		    		    int propClass = classifyComplexProp( tags, tok );                        if ( propClass == 5 ) {    	        String newTokens = new String( tok[0] + " a " );    	        for ( int i = 1; i < tok.length; i++ ) {    	            newTokens = newTokens.concat( tok[i] + " " );    	        }    	            	        tokens = newTokens;            }               		}				// add hyperlink		hyperlinkMap.put(tokens, "<a href=\"" + prop.getURI() + "\">" + tokens + "</a>");				linkContext.setKeyword(tokens);		linkProp = prop;	}				// TODO dataproperty is not the same as 	public void visit( OWLDataProperty prop ) throws OWLException {		// build NL tree 		String propName = getShortForm(prop.getURI());		//        String tokens = getEntityTokens(propName).toLowerCase();        String[] tok = getEntityTokens(propName).toLowerCase().split("( )+");		if (!tokens.startsWith("has") && !tokens.startsWith("is")) {			            String[] tags = tagger.tag(tok);            // System.out.println(tokens+" TAG:"+tags);				if(tags[0].startsWith("VB")){                tokens = "is "+tokens; // heuristic for verbs            }            else{               tokens = "has " + tokens; // heuristic for nouns            }		}				// add hyperlink		hyperlinkMap.put(tokens, "<a href=\"" + prop.getURI() + "\">" + tokens + "</a>");				linkContext.setKeyword(tokens);		linkProp = prop;	}		public void visit( OWLDataValue cd ) throws OWLException {				String dVal = " \"" + escape( cd.getValue() ) + "\"";		NLNode target = new NLNode(getEntityTokens(dVal), 3);		parent.addLink(linkContext, target);	}	public void visit( OWLAnd and ) throws OWLException {				if (linkProp!=null) 		    prefixDomain(linkProp); // suppose object of prop restr is a intersection				String saveCode = String.valueOf(this.parent.hashCode());		nodeStore.put(saveCode, parent);				boolean restoreLC = linkContext.isComplement(); // restore link complement for each intersection element				for ( Iterator it = and.getOperands().iterator();		it.hasNext(); ) {			linkContext.setLinkType(LINK_INTERSECTION);			linkContext.setKeyword("");			linkContext.setIsComplement(restoreLC);			OWLDescription desc = (OWLDescription) it.next();			desc.accept( this );			this.parent = (NLNode) nodeStore.get(saveCode); // reset parent at each iteration						// TESTING://			this.printTree();//			System.out.println();		}	}	public void visit( OWLOr or ) throws OWLException {				if (linkProp!=null) prefixDomain(linkProp); // suppose object of prop restr is a union		String saveCode = String.valueOf(this.parent.hashCode());		nodeStore.put(saveCode, parent);				boolean restoreLC = linkContext.isComplement(); // restore link complement for each union element				for ( Iterator it = or.getOperands().iterator();		it.hasNext(); ) {			linkContext.setLinkType(LINK_UNION);			linkContext.setKeyword("");			linkContext.setIsComplement(restoreLC);			OWLDescription desc = (OWLDescription) it.next();			desc.accept( this );			this.parent = (NLNode) nodeStore.get(saveCode); // reset parent at each iteration		}	}	public void visit( OWLNot not ) throws OWLException {				if (linkProp!=null) prefixDomain(linkProp); // suppose object of prop restr is a complement		linkContext.setIsComplement(true); // used to print "not" when object is a description		linkContext.setLinkType(NLVisitor.LINK_COMPLEMENT); // used to print "not" when object is a class		linkContext.setKeyword("");		OWLDescription desc = not.getOperand();		desc.accept( this );			}	public void visit( OWLEnumeration enumeration ) throws OWLException {			    System.out.println( "Visiting Enumeration" );	    		String saveCode = String.valueOf(this.parent.hashCode());		nodeStore.put(saveCode, parent);				boolean restoreLC = linkContext.isComplement(); // restore link complement for each oneof element				//NLLink originalLinkContext = linkContext;						String enumString = new String();				for ( Iterator it = enumeration.getIndividuals().iterator(); it.hasNext(); ) {			//linkContext.setLinkType(LINK_ONEOF);			//linkContext.setIsComplement(restoreLC);			OWLIndividual desc = (OWLIndividual) it.next();			//desc.accept( this );			//this.parent = (NLNode) nodeStore.get(saveCode); // reset parent at each iteration						String indName = getShortForm(desc.getURI());			String tokens = getEntityTokens(indName);			enumString = enumString.concat( indName + ";;;" );		}						NLNode enum_ = new NLNode( enumString, 4 ); // create dummy node ?		parent.addLink( linkContext, enum_ );	}	public void visit( OWLObjectSomeRestriction restriction ) throws OWLException {		prefixDomain(restriction.getObjectProperty());		linkContext.setLinkType(LINK_SOMEVALUES);		restriction.getObjectProperty().accept( this );		restriction.getDescription().accept( this );			}	public void visit( OWLObjectAllRestriction restriction ) throws OWLException {		prefixDomain(restriction.getObjectProperty());		linkContext.setLinkType(LINK_ALLVALUES);		restriction.getObjectProperty().accept( this );			restriction.getDescription().accept( this );	}	public void visit( OWLObjectValueRestriction restriction ) throws OWLException {		prefixDomain(restriction.getObjectProperty());						linkContext.setLinkType(LINK_HASVALUE);		restriction.getObjectProperty().accept( this );		restriction.getIndividual().accept( this );			}	public void visit( OWLDataSomeRestriction restriction ) throws OWLException {		prefixDomain(restriction.getDataProperty());		linkContext.setLinkType(LINK_SOMEVALUES);		restriction.getDataProperty().accept( this );		restriction.getDataType().accept( this );		

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?