📄 stopword.java
字号:
/******* StopWord.java *************************************************
Implement stopword removal using stoplist file (from stoplist.java)
***************************************************************************/
package drsystem;
import java.io.*;
import java.util.*;
public class StopWord //main class StopWord
{
void remStopWord(String fileName[]) throws IOException
{
StopList stopList=new StopList("E:/DRS/stoplist/english.stop");
//pass StopList file name
try
{
StoppedReader Istream=new StoppedReader(new FileReader("E:/DRS/dataset/"+fileName[0]),stopList);
BufferedWriter Ostream=new BufferedWriter(new FileWriter("E:/DRS/output_files/"+fileName[1]));
String line=null; //for storing line by line
while((line=Istream.readLine())!=null) //call modified readLine() method
{
Ostream.write(line); // write to a file
Ostream.write("\n"); // for goto start of next line
}
Istream.close(); // close streams
Ostream.close();
} //end of try block
catch(NullPointerException e)
{
System.out.println("StopWord.java: " + e.getMessage());
}
catch(FileNotFoundException e)
{
System.out.println("StopWord.java: " + e.getMessage());
}
}
}
//*******************************************************************
//StoppedReader class modified
//******************************************************************
class StoppedReader extends BufferedReader
{
StopList stopList=null;
public StoppedReader(Reader in, StopList stopList)
{
super(in); //call to parent class constructor
this.stopList=stopList; // initilize stoplist
}
@Override
public final String readLine() throws IOException
{
String line= super.readLine(); // call original readLine() method
if((line==null) || line.equals("")) //check end of list
return null;
else if(line.startsWith(".I")||line.startsWith(".W"))//check start of doc/text
return line;
else
{
StringBuffer result=new StringBuffer();
boolean first=true;
String delimiter = ",.:;/-()'[]\\\" ";
StringTokenizer tokenizer=new StringTokenizer(line,delimiter); // break line into token
while(tokenizer.hasMoreTokens())
{
String token=tokenizer.nextToken(); //read token one by one
if(!stopList.contains(token)) // if stopList contain token then do nothing i.e.
{ // dont write to result and go for next token
if(first) // if first token of line
{ // used to avoid spacing
result.append(token);
first=false;
}
else // if not first token then give space
result.append(" "+ token); // and write to result
}
} //end of while
return result.toString();
} //end of outermost else
} //end of readLine()
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -