⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 umlsambiguityontology.java

📁 dragontoolkit用于机器学习
💻 JAVA
📖 第 1 页 / 共 2 页
字号:
package dragon.nlp.ontology.umls;

import dragon.matrix.*;
import dragon.nlp.*;
import dragon.nlp.ontology.*;
import dragon.nlp.tool.*;
import dragon.util.*;
import java.io.File;
import java.util.*;
/**
 * <p>UMLS ontology with sense disambiguation  </p>
 * <p></p>
 * <p>Copyright: Copyright (c) 2005</p>
 * <p>Company: IST, Drexel University</p>
 * @author Davis Zhou
 * @version 1.0
 */

public class UmlsAmbiguityOntology extends UmlsOntology implements Ontology{
    private double minScore, subtermMinScore;
    private int maxSkippedWords;
    private double minSelectivity;

    private SparseMatrix wtMatrix;
    private UmlsTokenList tokenList;
    private UmlsCUIList cuiList;
    private ArrayList cuiListByIndex;
    private File directory;
    private UmlsSemanticNet snNet;

    public UmlsAmbiguityOntology(Lemmatiser lemmatiser){
        this(EnvVariable.getDragonHome()+"/nlpdata/umls",lemmatiser);
    }

    public UmlsAmbiguityOntology(String workDir, Lemmatiser lemmatiser) {
        super(lemmatiser);
        if(!FileUtil.exist(workDir) && FileUtil.exist(EnvVariable.getDragonHome()+"/"+workDir))
            workDir=EnvVariable.getDragonHome()+"/"+workDir;
        this.directory=new File(workDir);
        System.out.println(new java.util.Date() +" Loading Token CUI Matrix...");
        wtMatrix = new DoubleSuperSparseMatrix(directory+"/index.list", directory+"/tokencui.matrix");
        tokenList=new UmlsTokenList(directory+"/token.list");
        cuiList=new UmlsCUIList(directory+"/cui.list");
        cuiListByIndex=cuiList.getListSortedByIndex();
        UmlsSTYList styList=new UmlsSTYList(directory+"/semantictype.list");
        UmlsRelationNet relationNet=new UmlsRelationNet(directory+"/semanticrelation.list",styList);
        snNet=new UmlsSemanticNet(this,styList,relationNet);
        System.out.println(new java.util.Date() +" Ontology Loading Done!");

        maxSkippedWords=1;
        minScore=0.95;
        subtermMinScore=0.99;
        minSelectivity=0;
    }

    public void setMinScore(double minScore){
        this.minScore=minScore;
    }

    public double getMinScore(){
        return minScore;
    }

    public void setMinSelectivity(double minSelectivity){
        this.minSelectivity=minSelectivity;
    }

    public double getMinSelectivity(){
        return minSelectivity;
    }

    public void setMaxSkippedWords(int num){
        maxSkippedWords=num;
    }

    public int getMaxSkippedWords(){
        return maxSkippedWords;
    }

    public SemanticNet getSemanticNet(){
        return snNet;
    }

    public String[] getSemanticType(String[] cuis){
        SortedArray typeList;
        String[] arrTypes;
        int i,j;

        typeList=new SortedArray(3);
        for(i=0;i<cuis.length;i++)
        {
            arrTypes=getSemanticType(cuis[i]);
            if(arrTypes!=null){
                for(j=0;j<arrTypes.length;j++)
                    typeList.add(arrTypes[j]);
            }
        }
        if(typeList.size()>0){
            arrTypes=new String[typeList.size()];
            for(i=0;i<typeList.size();i++)
                arrTypes[i]=(String)typeList.get(i);
            return arrTypes;
        }
        else
            return null;
    }

    public String[] getSemanticType(String cui)
    {
        UmlsCUI cur;

        cur=cuiList.lookup(cui);
        if(cur==null)
            return null;
        else
            return cur.getAllSTY();
    }

    public String[] getCUI(String term){
        return null;
    }

    public String[] getCUI(Word starting, Word ending){
        return null;
    }

    public boolean isTerm(String term){
        return false;
    }

    public boolean isTerm(Word starting, Word ending){
        return false;
    }

    public ArrayList findAllTerms(Word start){
        return findAllTerms(start,null);
    }

    public ArrayList findAllTerms(Word start, Word end){
        ArrayList termList, canTermList;
        CandidateTerm  canTerm;
        Term curTerm;
        Word curWord;
        int i;

        termList=null;
        curTerm=null;
        canTermList=searchAllCandidates(start,end, minScore);
        if(canTermList==null || canTermList.size()<=0)
            return null;
        else
            termList=new ArrayList();

        for (i = canTermList.size()-1; i >=0; i--){
            canTerm = (CandidateTerm) canTermList.get(i);
            if (getSenseDisambiguationOption() && canTerm.getCandidateCUINum() > 1) {
                canTerm = disambiguateCandidateTerm(canTerm);
            }
            curTerm=generateTerm(canTerm,true);
            termList.add(curTerm);
        }
        curTerm=(Term)termList.get(0);
        curTerm.setSubConcept(false);
        curTerm.getStartingWord().setAssociatedConcept(curTerm);
        end=curTerm.getEndingWord();
        curWord=start.next;

        while(curWord!=null && curWord.getPosInSentence()<=end.getPosInSentence()){
            if(!isStartingWord(curWord)){
               curWord=curWord.next;
               continue;
            }
            canTermList=searchAllCandidates(curWord,end,subtermMinScore);
            if(canTermList!=null){
                for (i = canTermList.size()-1; i >=0; i--) {
                    canTerm = (CandidateTerm) canTermList.get(i);
                    if (getSenseDisambiguationOption() && canTerm.getCandidateCUINum() > 1) {
                        canTerm = disambiguateCandidateTerm(canTerm);
                    }
                    curTerm = generateTerm(canTerm, true);
                    termList.add(curTerm);
                }
            }
            curWord=curWord.next;
        }
        return termList;
    }

    public Term findTerm(Word start){
        return findTerm(start,null);
    }

    public Term findTerm(Word start, Word end){
        ArrayList canTermList;
        CandidateTerm  canTerm;

        canTermList=searchAllCandidates(start,end,minScore);
        if(canTermList==null || canTermList.size()<=0) return null;

        canTerm=(CandidateTerm)canTermList.get(canTermList.size()-1);

        //use contextual words to narrow down the sense
        if(getSenseDisambiguationOption() && canTerm.getCandidateCUINum()>1)
        {
            canTerm=disambiguateCandidateTerm(canTerm);
        }

        return generateTerm(canTerm,false);
    }

    private Term generateTerm(CandidateTerm canTerm, boolean isSubTerm){
        Term curTerm;
        String[] arrCUI;
        int i, candidateNum;

        //remove remaining candidates if there exists candidates with its score equal to or above 1.0
        i=0;
        candidateNum=canTerm.getCandidateCUINum();
        while(i<candidateNum && canTerm.getCandidateCUI(i).getScore()>=1.0)
            i++;
        if(i>0)
            candidateNum=i;

        //generate the term
        arrCUI = new String[candidateNum];
        for (i = 0; i < candidateNum; i++)
            arrCUI[i] = ((UmlsCUI) cuiListByIndex.get(canTerm.getCandidateCUI(i).getIndex())).toString();

        curTerm = new Term(canTerm.getStartingWord(),canTerm.getEndingWord());
        curTerm.setSubConcept(isSubTerm);
        if(!curTerm.isSubConcept())
            canTerm.getStartingWord().setAssociatedConcept(curTerm);
        if(candidateNum<=1 || canTerm.getCandidateCUI(1).getScore()<canTerm.getCandidateCUI(0).getScore())
            curTerm.setCUI(arrCUI[0]);
        curTerm.setCandidateCUI(arrCUI);
        if (curTerm.getCUI() == null) {
            curTerm.setCandidateTUI(getSemanticType(curTerm.getCandidateCUI()));
        }
        else {
            curTerm.setCandidateTUI(getSemanticType(curTerm.getCUI()));
        }
        if (curTerm.getCandidateTUINum() == 1) {
            curTerm.setTUI(curTerm.getCandidateTUI(0));
        }
        return curTerm;
    }

    private CandidateTerm disambiguateCandidateTerm(CandidateTerm canTerm){
        ArrayList contextList;
        Word curWord;
        int candidateNum, narrowedNum;
        int i, j, index;
        int[] arrCandidateCUI;

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -