⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 addcollocation.java

📁 褒贬评价
💻 JAVA
字号:
import java.util.*;
import java.io.*;

public class AddCollocation//将collocation在原语料中标记出来
{
	public static void main(String[] args) throws IOException
	{
		File fin = new File("1addpolar.txt");
        File fin2 = new File("collocation_seg.txt");
		FileWriter fout = new FileWriter("2addcollo.txt");
        Scanner scan = new Scanner (fin);
		while (scan.hasNextLine())
		{
			StringBuffer str1 = new StringBuffer(scan.nextLine());//将原语料中的一行赋给字符串str1
		    Scanner scan2 = new Scanner (fin2);
            while(scan2.hasNextLine())
			{
			 String str2 =scan2.nextLine();//将collocation表中的一行赋给str2
             Scanner scan3 = new Scanner(str2).useDelimiter("\\s+"); 
			 String str3 = scan3.next();
			 char letter='E';//用来保存褒贬词的词性,即+,-,*,~
             while (scan3.hasNext())
			 {
			   String str4 = scan3.next();
			   letter = str4.charAt(0);
			   if(letter != '?') str3 = str3 + "  " + str4;
			   else break;
			 }
             int idx = str1.indexOf(str3);//idx用来保存索引
			 int slen1 = str3.length();
             while(idx != -1)
		     {            
				int flag0=0;
			    int flag[]=new int[5];
			    flag[0]=str1.indexOf(",",idx);
		        flag[1]=str1.indexOf("。",idx);
		        flag[2]=str1.indexOf("?",idx);
			    flag[3]=str1.indexOf("!",idx);
			    flag[4]=str1.indexOf(";",idx);
			    Arrays.sort(flag);
			    for(int j=0;j<5;j++)
			    {
				  if(flag[j]!=-1)
				  {
				   flag0=flag[j];
				   break;
			      }
			    }
			    if (flag0==0) flag0=str1.length();
			    String str_senten=str1.substring(idx,flag0);
				int temp = str2.indexOf("?");
				String substr2 = str2.substring(temp+5);
				scan3 = new Scanner(substr2).useDelimiter("\\s+");
			    String str_3 = scan3.next();
                while (scan3.hasNext())
				 {
					String str4 = scan3.next();
					letter = str4.charAt(0);
					if(letter != '+' && letter != '-' && letter != '*' && letter != '~') str_3 = str_3 + "  " + str4; 
                    else break;
				}
				int inde = str_senten.indexOf(str_3);
				int slen2 = str_3.length();
				if(inde != -1)
				 {
                   /*index[i] = idx+inde; letters[i]=letter;length[i]=slen;
				   i++;*/
				   int temp_flag = idx-1;
				   char ch = str1.charAt(temp_flag);
				   while(ch == '+'||ch == '-'||ch == '*'||ch == '~'||ch == 'a'||ch == 'b'||ch == 'c'||ch == 'd'||ch == 'n'||ch == '0')
					   {
					   ch = str1.charAt(--temp_flag);
					   }
				   if(ch =='#')
					   {
					   str1.deleteCharAt(temp_flag);
					   idx--;
					   }
                   temp_flag = idx+inde-1;
				   ch = str1.charAt(temp_flag);
				   while(ch == '+'||ch == '-'||ch == '*'||ch == '~'||ch == 'a'||ch == 'b'||ch == 'c'||ch == 'd'||ch == 'n'||ch == '0')
					   {
					   ch = str1.charAt(--temp_flag);
					   }
				   if(ch =='#')
					   {
					   str1.deleteCharAt(temp_flag);
					   inde--;
					   }
				   str1.insert(idx+inde,"#^"+letter);
				   String sub_str=str1.substring(0,idx);
				   int index=sub_str.lastIndexOf("  ");
                   addFactor(str1,index,slen1);
                   sub_str=str1.substring(0,idx+inde);
				   index=sub_str.lastIndexOf("  ");
                   addFactor(str1,index,slen2);
                   str1.insert(idx,"#^"+letter);

				    System.out.println(str_3);
				}
				idx = str1.indexOf(str3,idx+5);
			 }
			 scan3.close();
			}
			scan2.close();
		    fout.write(str1.toString()+"\n");
			/*if(i>0)//str1中找到至少一个褒贬词
			{
			  int a,b,t,u;
			  char v;
			  for(a=1;a<i;a++)//用冒泡法对数组index排序
				 for (b=i-1;b>=a;b--)
				 {
				  if (index[b-1]>index[b])
				  { u = index[b-1];v = letters[b-1];t = length[b-1];
				    index[b-1]=index[b];letters[b-1]=letters[b];length[b-1]=length[b];
					index[b]=u;letters[b]=v;length[b]=t;
				  }
				 }
			  int m;
			  String str5;
			  for (m=0;m<i;m++)
			  {   
			   if(m==0)str5 = str1.substring(0,index[m]);
			   else str5 = str1.substring(index[m-1],index[m]);
               fout.write(str5 + "#^" + letters[m]);//^代表collocation
			  }
               str5 = str1.substring(index[m-1]); 
			   fout.write(str5);
			}
			else fout.write(str1);
            fout.write("\n");*/
		}
		scan.close();
		fout.close();
	}
			  //str1,index,slen1
  public static void addFactor(StringBuffer str1,int index,int slen)throws IOException
  {		   
	  int flag1=0,flag2=0;
	  int length1=0,temp_length=0;
	  int idx = str1.indexOf("#^");
	  char temp_letter='E';
	  File fin22 = new File("factor_seg.txt");
      Scanner scan22 = new Scanner (fin22);
      while(scan22.hasNextLine())
	 {
	  String str22 =scan22.nextLine();//将影响因子词表中的一行赋给str22
      Scanner scan33 = new Scanner (str22).useDelimiter("\\s+");
	  String str33 = scan33.next();
	  char lett='E';//用来保存影响因子的程度,即a,b,c,d,n,0
      while (scan33.hasNext())
	  {
		String str44 = scan33.next();
		lett = str44.charAt(0);
		if(lett != 'a' && lett != 'b' && lett != 'c' && lett != 'd'&& lett != 'n'&& lett != '0'&& lett != 'A')
	    str33 = str33 + "  " + str44;
		else 
		{if (lett=='A') lett=str44.charAt(1);
		 break;
		}
	  }
	 String str55;
	 if(str33.charAt(0)!='A')//看褒贬词前并且紧挨褒贬词的是不是影响因子
	 {
		try
		{
		str55 = str1.substring(index-str33.length(),index);//2代表两个空格
		}
		catch (StringIndexOutOfBoundsException sibe)
		{
		str55="";
		}
		if (str55.equals(str33))
		{
			flag1++;
			if(str33.length()>temp_length)
			{
				temp_length=str33.length();
				temp_letter=lett;
			}
		}
		else
		{
			try
			{
			str55 = str1.substring(index-5-str33.length(),index);//5代表的/u和两个空格
			}
			catch (StringIndexOutOfBoundsException sibe)
			{
			str55="";
			}
			if (str55.equals(str33+"  的/u")||str55.equals(str33+"  地/u")||str55.equals(str33+"  得/u"))
			{
				flag2++;
				if(str33.length()>temp_length)
				{
					temp_length=str33.length();
				    temp_letter=lett;
				}
			}
		}
	 }
	else//看褒贬词后并且紧挨褒贬词的是不是影响因子
	{  
		str33 = str33.substring(5);
		try
		{
		str55 = str1.substring(index+2+slen,index+2+slen+str33.length());
		}
		catch (StringIndexOutOfBoundsException sibe)
		{
		str55 = "";
		}
		if (str55.equals(str33))
		{
            str1.insert(idx+3,lett);
			break;
		}
		else
		{
			try
			{
			str55 = str1.substring(index+slen+2,index+slen+2+5+str33.length());
			}
			catch (StringIndexOutOfBoundsException sibe)
			{
			str55 = "";
			}
			if (str55.equals("的/u  "+str33)||str55.equals("地/u  "+str33)||str55.equals("得/u  "+str33))
			{
				str1.insert(idx+3,lett);
			    break;
			}
		}
	 }
    }//退出while(scan22)循环
	if(flag1>0)
    {
		str1.insert(idx+3,temp_letter);
        length1+=2;
		length1+=temp_length;
	}
	else if(flag2>0)
	{
		str1.insert(idx+3,temp_letter);
        length1+=7;
		length1+=temp_length;
	}
	while(flag1>0||flag2>0)
	{
		flag1=0;
		flag2=0;
		temp_length=0;
		Scanner scan20 = new Scanner (fin22);
	    while(scan20.hasNextLine())
		{
			String str22 =scan20.nextLine();//将影响因子词表中的一行赋给str22
            Scanner scan33 = new Scanner (str22).useDelimiter("\\s+");
			String str33 = scan33.next();
			char lett='E';//用来保存影响因子的程度,即a,b,c,d,n,0
            while (scan33.hasNext())
			{
			 String str44 = scan33.next();
			 lett = str44.charAt(0);
			 if(lett != 'a' && lett != 'b' && lett != 'c' && lett != 'd'&& lett != 'n'&& lett != '0'&& lett != 'A')
			  str33 = str33 + "  " + str44;
			 else 
			 {if (lett=='A') lett=str44.charAt(1);
			  break;
			 }
			}
			String str55;
			if(str33.charAt(0)!='A')//看褒贬词前并且紧挨褒贬词的是不是影响因子
			{
			   try
			   {
			   str55 = str1.substring(index-length1-str33.length(),index-length1);//2代表两个空格
			   }
			   catch (StringIndexOutOfBoundsException sibe)
				{
				str55="";
				}
				if (str55.equals(str33))
				{
				flag1++;
				if(str33.length()>temp_length)
				{
					temp_length=str33.length();
				    temp_letter=lett;
				}
			}
			else
			{
				try
				{
				str55 = str1.substring(index-5-length1-str33.length(),index-length1);//5代表的/u和两个空格
				}
				catch (StringIndexOutOfBoundsException sibe)
				{
				str55="";
				}
				if (str55.equals(str33+"  的/u")||str55.equals(str33+"  地/u")||str55.equals(str33+"  得/u"))
				{
				flag2++;
				if(str33.length()>temp_length)
				{
					temp_length=str33.length();
				    temp_letter=lett;
				}
			}
		 }
		}
	}//退出内while循环(scan20.hasNextLine())
	 if(flag1>0)
	{
		str1.insert(idx+3,temp_letter);
        length1+=2;
		length1+=temp_length;
	}
	else if(flag2>0)
	{
		str1.insert(idx+3,temp_letter);
        length1+=7;
		length1+=temp_length;
	}
   }
  }
}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -