📄 umlsambiguityontology.java
字号:
package dragon.nlp.ontology.umls;
import dragon.matrix.*;
import dragon.nlp.*;
import dragon.nlp.ontology.*;
import dragon.nlp.tool.*;
import dragon.util.*;
import java.io.File;
import java.util.*;
/**
* <p>UMLS ontology with sense disambiguation </p>
* <p></p>
* <p>Copyright: Copyright (c) 2005</p>
* <p>Company: IST, Drexel University</p>
* @author Davis Zhou
* @version 1.0
*/
public class UmlsAmbiguityOntology extends UmlsOntology implements Ontology{
private double minScore, subtermMinScore;
private int maxSkippedWords;
private double minSelectivity;
private SparseMatrix wtMatrix;
private UmlsTokenList tokenList;
private UmlsCUIList cuiList;
private ArrayList cuiListByIndex;
private File directory;
private UmlsSemanticNet snNet;
public UmlsAmbiguityOntology(Lemmatiser lemmatiser){
this(EnvVariable.getDragonHome()+"/nlpdata/umls",lemmatiser);
}
public UmlsAmbiguityOntology(String workDir, Lemmatiser lemmatiser) {
super(lemmatiser);
if(!FileUtil.exist(workDir) && FileUtil.exist(EnvVariable.getDragonHome()+"/"+workDir))
workDir=EnvVariable.getDragonHome()+"/"+workDir;
this.directory=new File(workDir);
System.out.println(new java.util.Date() +" Loading Token CUI Matrix...");
wtMatrix = new DoubleSuperSparseMatrix(directory+"/index.list", directory+"/tokencui.matrix");
tokenList=new UmlsTokenList(directory+"/token.list");
cuiList=new UmlsCUIList(directory+"/cui.list");
cuiListByIndex=cuiList.getListSortedByIndex();
UmlsSTYList styList=new UmlsSTYList(directory+"/semantictype.list");
UmlsRelationNet relationNet=new UmlsRelationNet(directory+"/semanticrelation.list",styList);
snNet=new UmlsSemanticNet(this,styList,relationNet);
System.out.println(new java.util.Date() +" Ontology Loading Done!");
maxSkippedWords=1;
minScore=0.95;
subtermMinScore=0.99;
minSelectivity=0;
}
public void setMinScore(double minScore){
this.minScore=minScore;
}
public double getMinScore(){
return minScore;
}
public void setMinSelectivity(double minSelectivity){
this.minSelectivity=minSelectivity;
}
public double getMinSelectivity(){
return minSelectivity;
}
public void setMaxSkippedWords(int num){
maxSkippedWords=num;
}
public int getMaxSkippedWords(){
return maxSkippedWords;
}
public SemanticNet getSemanticNet(){
return snNet;
}
public String[] getSemanticType(String[] cuis){
SortedArray typeList;
String[] arrTypes;
int i,j;
typeList=new SortedArray(3);
for(i=0;i<cuis.length;i++)
{
arrTypes=getSemanticType(cuis[i]);
if(arrTypes!=null){
for(j=0;j<arrTypes.length;j++)
typeList.add(arrTypes[j]);
}
}
if(typeList.size()>0){
arrTypes=new String[typeList.size()];
for(i=0;i<typeList.size();i++)
arrTypes[i]=(String)typeList.get(i);
return arrTypes;
}
else
return null;
}
public String[] getSemanticType(String cui)
{
UmlsCUI cur;
cur=cuiList.lookup(cui);
if(cur==null)
return null;
else
return cur.getAllSTY();
}
public String[] getCUI(String term){
return null;
}
public String[] getCUI(Word starting, Word ending){
return null;
}
public boolean isTerm(String term){
return false;
}
public boolean isTerm(Word starting, Word ending){
return false;
}
public ArrayList findAllTerms(Word start){
return findAllTerms(start,null);
}
public ArrayList findAllTerms(Word start, Word end){
ArrayList termList, canTermList;
CandidateTerm canTerm;
Term curTerm;
Word curWord;
int i;
termList=null;
curTerm=null;
canTermList=searchAllCandidates(start,end, minScore);
if(canTermList==null || canTermList.size()<=0)
return null;
else
termList=new ArrayList();
for (i = canTermList.size()-1; i >=0; i--){
canTerm = (CandidateTerm) canTermList.get(i);
if (getSenseDisambiguationOption() && canTerm.getCandidateCUINum() > 1) {
canTerm = disambiguateCandidateTerm(canTerm);
}
curTerm=generateTerm(canTerm,true);
termList.add(curTerm);
}
curTerm=(Term)termList.get(0);
curTerm.setSubConcept(false);
curTerm.getStartingWord().setAssociatedConcept(curTerm);
end=curTerm.getEndingWord();
curWord=start.next;
while(curWord!=null && curWord.getPosInSentence()<=end.getPosInSentence()){
if(!isStartingWord(curWord)){
curWord=curWord.next;
continue;
}
canTermList=searchAllCandidates(curWord,end,subtermMinScore);
if(canTermList!=null){
for (i = canTermList.size()-1; i >=0; i--) {
canTerm = (CandidateTerm) canTermList.get(i);
if (getSenseDisambiguationOption() && canTerm.getCandidateCUINum() > 1) {
canTerm = disambiguateCandidateTerm(canTerm);
}
curTerm = generateTerm(canTerm, true);
termList.add(curTerm);
}
}
curWord=curWord.next;
}
return termList;
}
public Term findTerm(Word start){
return findTerm(start,null);
}
public Term findTerm(Word start, Word end){
ArrayList canTermList;
CandidateTerm canTerm;
canTermList=searchAllCandidates(start,end,minScore);
if(canTermList==null || canTermList.size()<=0) return null;
canTerm=(CandidateTerm)canTermList.get(canTermList.size()-1);
//use contextual words to narrow down the sense
if(getSenseDisambiguationOption() && canTerm.getCandidateCUINum()>1)
{
canTerm=disambiguateCandidateTerm(canTerm);
}
return generateTerm(canTerm,false);
}
private Term generateTerm(CandidateTerm canTerm, boolean isSubTerm){
Term curTerm;
String[] arrCUI;
int i, candidateNum;
//remove remaining candidates if there exists candidates with its score equal to or above 1.0
i=0;
candidateNum=canTerm.getCandidateCUINum();
while(i<candidateNum && canTerm.getCandidateCUI(i).getScore()>=1.0)
i++;
if(i>0)
candidateNum=i;
//generate the term
arrCUI = new String[candidateNum];
for (i = 0; i < candidateNum; i++)
arrCUI[i] = ((UmlsCUI) cuiListByIndex.get(canTerm.getCandidateCUI(i).getIndex())).toString();
curTerm = new Term(canTerm.getStartingWord(),canTerm.getEndingWord());
curTerm.setSubConcept(isSubTerm);
if(!curTerm.isSubConcept())
canTerm.getStartingWord().setAssociatedConcept(curTerm);
if(candidateNum<=1 || canTerm.getCandidateCUI(1).getScore()<canTerm.getCandidateCUI(0).getScore())
curTerm.setCUI(arrCUI[0]);
curTerm.setCandidateCUI(arrCUI);
if (curTerm.getCUI() == null) {
curTerm.setCandidateTUI(getSemanticType(curTerm.getCandidateCUI()));
}
else {
curTerm.setCandidateTUI(getSemanticType(curTerm.getCUI()));
}
if (curTerm.getCandidateTUINum() == 1) {
curTerm.setTUI(curTerm.getCandidateTUI(0));
}
return curTerm;
}
private CandidateTerm disambiguateCandidateTerm(CandidateTerm canTerm){
ArrayList contextList;
Word curWord;
int candidateNum, narrowedNum;
int i, j, index;
int[] arrCandidateCUI;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -