📄 addzero.java
字号:
import java.util.*;
import java.io.*;
public class AddZero//将collocation在原语料中标记出来
{
public static void main(String[] args) throws IOException
{
File fin = new File("2addcollo.txt");
File fin2 = new File("zero_seg.txt");
FileWriter fout = new FileWriter("3addzero.txt");
Scanner scan = new Scanner (fin);
while (scan.hasNextLine())
{
//str1 = scan.nextLine();//将原语料中的一行赋给字符串str1
StringBuffer str1 = new StringBuffer(scan.nextLine());//将原语料中的一行赋给字符串str1
Scanner scan2 = new Scanner (fin2);
while(scan2.hasNextLine())
{
String str2 =scan2.nextLine();//将collocation表中的一行赋给str2
int idx = str1.indexOf(str2);//idx用来保存索引
while(idx != -1)
{
String substr=str1.substring(0,idx);
int flag0,flag1,flag2,flag3,flag4;
flag1=substr.lastIndexOf("。");
flag2=substr.lastIndexOf("?");
flag3=substr.lastIndexOf("!");
flag4=Math.max(flag1,flag2);
flag0=Math.max(flag3,flag4);
if(flag0==-1) flag0=0;
int flag00=0;
int flag[]=new int[3];
flag[0]=str1.indexOf("。",idx);
flag[1]=str1.indexOf("?",idx);
flag[2]=str1.indexOf("!",idx);
Arrays.sort(flag);
for(int j=0;j<3;j++)
{
if(flag[j]!=-1)
{
flag00=flag[j];
break;
}
}
if (flag00==0) flag00=str1.length();
String str_zero=str1.substring(flag0,flag00);
int index = str_zero.indexOf("#");
while (index != -1)
{
char ch = str_zero.charAt(index+1);
if((ch == '+')||(ch == '-')||(ch == '*')||(ch == '~')||(ch == '^'))
{
/*str1.insert(flag0+index+2,'0');
flag0++;
idx++;*/
str1.delete(flag0+index,flag0+index+2);
flag0-=2;
//idx-=2;
}
index = str_zero.indexOf("#",index+1);
}
//str1 = strbuf.toString();
idx = str1.indexOf(str2,idx+1);
}
}
scan2.close();
fout.write(str1.toString()+"\n");
}
scan.close();
fout.close();
}
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -