📄 taglink.java
字号:
}
sortCandidateList(candidateList);
// iterate the candidate list
buff.append("\nCommon tokens (Algorithm 1):\n");
buff.append("Ti\tUj\tSij*Xij\n");
float score = 0.0f;
HashMap<Integer,Object> tMap = new HashMap<Integer, Object>();
HashMap<Integer,Object> uMap = new HashMap<Integer, Object>();
for (Object aCandidateList : candidateList) {
Candidates actualCandidates = (Candidates) aCandidateList;
Integer tPos = actualCandidates.getTPos();
Integer uPos = actualCandidates.getUPos();
if ((!tMap.containsKey(tPos)) &&
(!uMap.containsKey(uPos))) {
float tokenScore = actualCandidates.getScore();
score += tokenScore;
tMap.put(tPos, null);
uMap.put(uPos, null);
buff.append(tTokens[(tPos)]).append("\t").append(uTokens[(uPos)]).append("\t").append(round(tokenScore)).append("\n");
}
}
buff.append("\nS(T,U)=").append(round(score)).append("\n");
}
return buff.toString();
}
/**
* obtainCandidateList set a candidate list of pair of tokens. Sometimes it
* will not compute all candidate pairs in oder to reduce the computational
* cost.
*
* @param tTokens String[]
* @param uTokens String[]
* @param tIdfArray float[]
* @param uIdfArray float[]
* @return ArrayList
*/
private ArrayList<Candidates> obtainCandidateList(String[] tTokens, String[] uTokens,
float[] tIdfArray, float[] uIdfArray) {
ArrayList<Candidates> candidateList = new ArrayList<Candidates>();
float minStringSize = getMinStringSize(tTokens, uTokens);
for (int t = 0; t < tTokens.length; t++) {
int lastTr = -1;
for (int u = 0, flag = 0; u < uTokens.length && flag == 0; u++) {
int tr = Math.abs(t - u);
if (lastTr >= 0 && lastTr < tr) {
flag = 1;
} else {
String tTok = tTokens[t], uTok = uTokens[u];
float innerScore = characterBasedStringMetric.getSimilarity(tTok,
uTok);
if (innerScore >= 0.0f) {
float matched;
if (innerScore == 1.0f) {
matched = tTokens[t].length();
} else {
matched = ((TagLinkToken) characterBasedStringMetric).getMatched();
}
float weightMatched = matched / minStringSize,
weightTFIDF = tIdfArray[t] * uIdfArray[u],
weight = (weightTFIDF + weightMatched) / 2.0f;
if (innerScore == 1.0f) {
lastTr = tr;
}
candidateList.add(new Candidates(t, u, innerScore * weight));
}
}
}
}
return candidateList;
}
/**
* sortCandidateList sort a list of candidate pair of tokens.
*
* @param list ArrayList of candidates
*/
private void sortCandidateList(ArrayList<Candidates> list) {
java.util.Collections.sort(list, new java.util.Comparator() {
public int compare(Object o1, Object o2) {
// First sort, by score in index
float scoreT = ((Candidates) o1).getScore();
float scoreU = ((Candidates) o2).getScore();
if (scoreU > scoreT) {
return 1;
}
if (scoreU < scoreT) {
return -1;
}
return 0;
}
}
);
}
/**
* getIDFArray normalize a vector of IDF weights.
*
* @param tokenArray String[]
* @return float[]
*/
private float[] getIDFArray(String[] tokenArray) {
int tokenArrayLength = tokenArray.length;
float[] IDFArray = new float[tokenArrayLength];
if (idfMap == null) {
float cosineWeight = 1.0f / ((float) Math.sqrt(tokenArrayLength));
for (int i = 0; i < tokenArrayLength; i++) {
IDFArray[i] = cosineWeight;
}
} else {
float sq = 0f;
for (int i = 0; i < tokenArrayLength; i++) {
String actualToken = tokenArray[i];
float idfWeight = 0.0f;
try {
idfWeight = (idfMap.get(actualToken));
}
catch (Exception e) {
//SAM added this as the catch was unguarded.
e.printStackTrace();
}
IDFArray[i] = idfWeight;
sq += idfWeight * idfWeight;
}
sq = (float) Math.sqrt(sq);
for (int i = 0; i < tokenArrayLength; i++) {
IDFArray[i] = IDFArray[i] / sq;
}
}
return IDFArray;
}
/**
* returns the long string identifier for the metric.
*
* @return the long string identifier for the metric
*/
public String getLongDescriptionString() {
return getShortDescriptionString();
}
/**
* getShortDescriptionString returns the name and parameters of this string metric
*
* @return String
*/
public String getShortDescriptionString() {
if (idfMap == null) {
return "[TagLink_[" + characterBasedStringMetric.toString() +
"]";
} else {
return "[TagLink_IDF_[" + characterBasedStringMetric.toString() +
"]";
}
}
/**
* getIDFMap compute the IDF weights for the dataset provided.
*
* @param dataSetArray String[]
*/
private HashMap<String,Float> getIDFMap(String[] dataSetArray) {
float N = dataSetArray.length;
HashMap<String,Float> idfMap = new HashMap<String, Float>();
for (int row = 0; row < N; row++) {
HashMap<String,Object> rowMap = new HashMap<String, Object>();
HashMap<String,Float> freqMap = new HashMap<String, Float>();
String actualRow = dataSetArray[row];
Vector<String> tokenVector = tokeniser.tokenize(actualRow);
String[] rowArray = tokenVector.toArray(new String[tokenVector.size()]);
for (String actualToken : rowArray) {
rowMap.put(actualToken, null);
float actualFrequency = getFrequency(actualToken, freqMap) +
1.0f;
freqMap.put(actualToken, actualFrequency);
}
Collection<String> entries = rowMap.keySet();
for (String actualToken : entries) {
float actualFrequency = getFrequency(actualToken, idfMap) +
1.0f;
idfMap.put(actualToken, actualFrequency);
}
}
Collection<Map.Entry<String,Float>> entries = idfMap.entrySet();
Map.Entry<String,Float> ent;
for (Map.Entry<String, Float> entry : entries) {
ent = entry;
String key = ent.getKey();
float frequency = ent.getValue();
float idf = (float) Math.log((N / frequency) + 1.0f);
idfMap.put(key, idf);
}
return idfMap;
}
/**
* getFrequency retrieve the value of the map.
*
* @param word String
* @param map Map
* @return float
*/
private float getFrequency(String word, Map<String,Float> map) {
Float frequency = map.get(word);
if (frequency == null) {
return 0;
}
return frequency;
}
/**
* round a float number.
*
* @param number float
* @return float
*/
private float round(float number) {
int round = (int) (number * 1000.00f);
float rest = (number * 1000.00f) - round;
if (rest >= 0.5f) {
round++;
}
return (round / 1000.00f);
}
}
class Candidates implements Serializable {
private int tPos, uPos;
private float score;
/**
* Candidates constructor. Creates an instance of a candidate string pair T and U. It
* requires the position of the pair in the string and the score or distance
* between them.
*
* @param tPos int
* @param uPos int
* @param score float
*/
public Candidates(int tPos, int uPos, float score) {
this.tPos = tPos;
this.uPos = uPos;
this.score = score;
}
/**
* getTPos, return the position of string T.
*
* @return int
*/
public int getTPos() {
return tPos;
}
/**
* getUPos, return the position of string U.
*
* @return int
*/
public int getUPos() {
return uPos;
}
/**
* getScore, return the score or distance between strings T and U.
*
* @return float
*/
public float getScore() {
return score;
}
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -