📄 stemrevised.java
字号:
package searchingEngine.queryPrepocessing;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.PrintStream;
import searchingEngine.queryPrepocessing.MartinPorter.Stemmer;
public class StemRevised extends Stemmer{
private final int MAX = 1024;
public String fullStem(String input){
StringBuffer stemed = new StringBuffer();
char[] w = new char[MAX];
int count = 0;
char ch;
while(true){
ch = (input.charAt(count));
count++;
if (Character.isLetter(ch)){
int j = 0;
while(true){
ch = Character.toLowerCase(ch);
w[j] = ch;
if (j < MAX-1) j++;
ch = (input.charAt(count));
count++;
if (!Character.isLetter(ch)){
for (int c = 0; c < j; c++) add(w[c]);
stem();
String u = toString();
String lastChar=u.substring(u.length()-1, u.length());
String body=u.substring(0, u.length()-1);
String result=new String(body + lastChar.toUpperCase());
stemed.append(result);
stemed.append(ch);
break;
}
if (count>=input.length()) break;
}
}
if (count>=input.length()) break;
stemed.append(ch);
}
return stemed.toString();
}
public void stemFile(String inputFile,String outputFile) throws IOException{
char[] w = new char[MAX];
PrintStream k=new PrintStream(new FileOutputStream(outputFile));
FileInputStream in = new FileInputStream(inputFile);
while(true)
{ int ch = in.read();
if (Character.isLetter((char) ch))
{
int j = 0;
while(true)
{ ch = Character.toLowerCase((char) ch);
w[j] = (char) ch;
if (j < 500) j++;
ch = in.read();
if (!Character.isLetter((char) ch))
{
/* to test add(char ch) */
for (int c = 0; c < j; c++) add(w[c]);
/* or, to test add(char[] w, int j) */
/* s.add(w, j); */
stem();
{ String u;
/* and now, to test toString() : */
u = toString();
/*
to test getResultBuffer(), getResultLength() :
u = new String(s.getResultBuffer(), 0, s.getResultLength());
System.out.print(u);
*/
String lastChar=u.substring(u.length()-1, u.length());
String body=u.substring(0, u.length()-1);
String result=new String(body + lastChar.toUpperCase());
/* to test getResultBuffer(), getResultLength() : */
/* u = new String(s.getResultBuffer(), 0, s.getResultLength()); */
k.print(result);
}
break;
}
}
}
if (ch < 0) break;
k.print((char)ch);
//wr.close();
}
}
public static void main(String arg[]){
String query = "601 and or not Turkey Iraq water What is the effect of Turkish river control projects on Iraqi water resources? A relevant document will deal specifically with water issues between Turkey and Iraq. Other political and economic concerns between the two countries (e.g. the Kurds or water to Syria, Israel and Lebanon) are not relevant.";
System.out.println(query);
StemRevised stem = new StemRevised();
try {
stem.stemFile("haha.txt","out.txt");
} catch (Exception e) {}
}
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -