umlsambiguityontology.java
来自「dragontoolkit用于机器学习」· Java 代码 · 共 506 行 · 第 1/2 页
JAVA
506 行
double[] arrCandidateScore;
double score;
candidateNum=canTerm.getCandidateCUINum();
arrCandidateCUI=new int[candidateNum];
arrCandidateScore=new double[candidateNum];
for(i=0;i<candidateNum;i++)
{
arrCandidateCUI[i] = canTerm.getCandidateCUI(i).getIndex();
arrCandidateScore[i] = canTerm.getCandidateCUI(i).getScore();
}
contextList = generateContextWindow(canTerm.getStartingWord().getParent(), canTerm.getStartingWord(), canTerm.getEndingWord());
for(i=0;i<contextList.size();i++)
{
curWord=(Word)contextList.get(i);
index = getIndexInTokenList(curWord);
if (index<0) continue;
narrowedNum = 0;
for (j = 0; j < candidateNum; j++) {
if ( (score = wtMatrix.getDouble(index, arrCandidateCUI[j])) > 0) {
arrCandidateCUI[narrowedNum] = arrCandidateCUI[j];
arrCandidateScore[narrowedNum] = arrCandidateScore[j] + score;
narrowedNum += 1;
}
}
if (narrowedNum > 0) candidateNum = narrowedNum;
}
if(candidateNum<canTerm.getCandidateCUINum())
canTerm=buildCandidateTerm(canTerm.getStartingWord(),canTerm.getEndingWord(),arrCandidateCUI,arrCandidateScore,candidateNum,minScore);
contextList.clear();
return canTerm;
}
private ArrayList searchAllCandidates(Word start, Word end, double minScore){
ArrayList canTermList;
CandidateTerm canTerm;
Sentence sent;
Word curWord;
int candidateNum, narrowedNum;
int j, index;
int[] arrCandidateCUI;
double[] arrCandidateScore;
double score;
int skippedWords;
if((index=getIndexInTokenList(start))<0) return null;
sent=start.getParent();
//set the right bounary of the possible term.
curWord=start.next;
if(end==null){
j = 0;
while (j <4 && curWord != null && end==null) {
if(isBoundaryWord(curWord))
end = curWord.prev;
if (!curWord.isPunctuation())
j++;
curWord = curWord.next;
}
if (curWord == null)
curWord = sent.getLastWord();
if (end == null)
end = curWord;
}
//get the number of none zero columns
candidateNum = wtMatrix.getNonZeroNumInRow(index);
//if none of them contain it, continue
if (candidateNum <= 0) return null;
//get array of non zero column index, that is the CUIs that contain the word
arrCandidateCUI = wtMatrix.getNonZeroColumnsInRow(index);
//get array of non zero column scores
arrCandidateScore = wtMatrix.getNonZeroDoubleScoresInRow(index);
canTermList=new ArrayList(3);
if((canTerm=buildCandidateTerm(start,start,arrCandidateCUI,arrCandidateScore,candidateNum,minScore))!=null)
canTermList.add(canTerm);
//extract tokens within window
curWord=start.next;
skippedWords=0;
while (curWord!=null && skippedWords<=maxSkippedWords && curWord.getPosInSentence() <=end.getPosInSentence()) {
if (! isUsefulForTerm(curWord)) {
curWord = curWord.next;
continue;
}
index=getIndexInTokenList(curWord);
if(index<0) {
curWord = curWord.next;
skippedWords++;
continue;
}
narrowedNum = 0;
for (j = 0; j < candidateNum; j++) {
if ( (score = wtMatrix.getDouble(index,arrCandidateCUI[j])) > 0) {
arrCandidateCUI[narrowedNum] = arrCandidateCUI[j];
arrCandidateScore[narrowedNum] = arrCandidateScore[j] + score;
narrowedNum += 1;
}
}
if (narrowedNum > 0)
{
candidateNum = narrowedNum;
if((canTerm=buildCandidateTerm(start,curWord,arrCandidateCUI,arrCandidateScore,candidateNum,minScore))!=null)
canTermList.add(canTerm);
skippedWords=0;
}
else{
skippedWords++;
}
curWord = curWord.next;
}
return canTermList;
}
/*The follow fucnction generates the contextual words for a term specified by its starting word and ending word.
We take up to three three left words (noun or adjective) and up to three right words as the window.*/
private ArrayList generateContextWindow(Sentence sent, Word start, Word end) {
ArrayList contexts;
Word cur;
int i;
contexts = new ArrayList(6);
cur = start.prev;
i = 0;
while (i < 3 && cur != null) {
if (cur.getPOSIndex() == Tagger.POS_NOUN || cur.getPOSIndex() == Tagger.POS_ADJECTIVE) {
contexts.add(cur);
i = i + 1;
}
cur = cur.prev;
}
cur = start.next;
i = 0;
while (i < 3 && cur != null) {
if (cur.getPOSIndex() == Tagger.POS_NOUN || cur.getPOSIndex() == Tagger.POS_ADJECTIVE) {
contexts.add(cur);
i = i + 1;
}
cur = cur.next;
}
return contexts;
}
private CandidateTerm buildCandidateTerm(Word starting, Word ending, int[] arrCandidateCUI, double[] arrCandidateScore, int candidateNum,double minScore){
CandidateTerm cTerm;
int i;
if(ending.getPOSIndex()==Tagger.POS_ADJECTIVE && (!getAdjectiveTermOption() || !ending.equals(starting))) return null;
if(ending.getPOSIndex()==Tagger.POS_NUM && ending.equals(starting)) return null;
if(1.0/candidateNum<minSelectivity) return null;
cTerm=new CandidateTerm(starting, ending);
for(i=0;i<candidateNum;i++)
if(arrCandidateScore[i]>=minScore)
cTerm.addCandidateCUI(new CandidateCUI(arrCandidateCUI[i],arrCandidateScore[i]));
if(cTerm.getCandidateCUINum()>0){
if(ending.getPOSIndex()==Tagger.POS_ADJECTIVE && cTerm.getCandidateCUI(0).getScore()<1)
return null;
else
return cTerm;
}
else
return null;
}
private int getIndexInTokenList(Word word){
if(word.getIndex()==Integer.MIN_VALUE)
{
Token token=tokenList.lookup(getLemma(word));;
if (token==null)
word.setIndex(-1);
else
word.setIndex(token.getIndex());
}
return word.getIndex();
}
private class CandidateCUI implements Comparable{
private int index;
private double score;
public CandidateCUI(int index, double score){
this.index=index;
this.score=score;
}
public double getScore(){
return score;
}
public int getIndex(){
return index;
}
public int compareTo(Object obj){
double objScore;
int objIndex;
objScore=((CandidateCUI)obj).getScore();
if(score>objScore)
return -1;
else if(score<objScore)
return 1;
else
{
objIndex=((CandidateCUI)obj).getIndex();
if(index>objIndex)
return 1;
else if(index<objIndex)
return -1;
else
return 0;
}
}
}
private class CandidateTerm {
Word starting, ending;
SortedArray candidates;
public CandidateTerm(Word starting, Word ending){
this.starting=starting;
this.ending=ending;
candidates=new SortedArray();
}
public void addCandidateCUI(CandidateCUI cui){
candidates.add(cui);
}
public Word getStartingWord(){
return starting;
}
public Word getEndingWord(){
return ending;
}
public CandidateCUI getCandidateCUI(int index){
return (CandidateCUI)candidates.get(index);
}
public int getCandidateCUINum(){
return candidates.size();
}
}
}
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?