📄 postinglist.java
字号:
/******* PostingList.java *********************************************
Implement PostingList using inverted list, also calcualte df
i.e. document frequency and count D i.e total no. of documents
***********************************************************************/
package drsystem;
import java.util.*;
import java.io.*;
class PostingList // main class PostingList
{
//hashmap for dictionary posting list
HashMap<String,Vector<Integer>> hPosList=null;
//hashmap for storing document freq value (df)
HashMap<String,Integer> hmapDF=null;
int D=0; // count total no. of documents
Vector<Integer> noTermsDoc=null; //count no. of terms in each doc.
public void createPList(String fileIO[]) throws IOException // start of function
{
hPosList = new HashMap<String,Vector<Integer>>();
hmapDF=new HashMap<String,Integer>();
D=0;
String str=null; //for temp storing each line
Integer doc_id=0; // keep track of doc. id
noTermsDoc = new Vector<Integer>();
try
{
BufferedReader fin=new BufferedReader(new FileReader("E:/DRS/output_files/"+fileIO[0]));
String delimiter = ",.:;/-()'[]\\\" "; // used for breaking sentence
while((str=fin.readLine())!=null)
{
if(str.startsWith(".i")) // check if start of new document
{
doc_id=Integer.valueOf(str.substring(3)); // find doc. number
noTermsDoc.addElement(0); // initialize
D=D+1; // count no. of documents
continue;
}
StringTokenizer tokenizer=new StringTokenizer(str,delimiter);
while(tokenizer.hasMoreTokens())
{
String token=tokenizer.nextToken();
if(token.equals(".w")) // ignore line containing .w
continue;
noTermsDoc.set(D-1,noTermsDoc.get(D-1)+1);// increment no. of terms by 1
if(hPosList.containsKey(token)) // if token already present
{
Vector<Integer> vTemp= (Vector<Integer>) hPosList.get(token);
if(! vTemp.contains(doc_id)) // if doc_id for current token not already present
{ // otherwise ignore as needn't to add again
vTemp.addElement(doc_id);
hmapDF.put(token,vTemp.size()); //store doc. frequency
}
}
else // if token already not present
{
Vector<Integer> vPList=new Vector<Integer>();
vPList.addElement(doc_id); // create vector and add doc_id
hPosList.put(token,vPList);
hmapDF.put(token,vPList.size()); //store doc. frequency
}
}
}
}
catch(FileNotFoundException e)
{
System.out.println("PostingList.java: " + e.getMessage());
}
catch(NumberFormatException e)
{
System.out.println("PostingList.java: " + e.getMessage());
}
}
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -