⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 hebing.java

📁 采用JAVA实现两个词库的合并
💻 JAVA
字号:
import java.io.*;
public class heBing{
	/**
	 * @param args
	 */
	public static void main(String[] args) {
		// TODO 自动生成方法存根
		ciku ck = new ciku();
		ck.readword();
	}
}

class ciku{
	String s1 = null;//第一个词库的词
	String s2 = null;//第二个词库的词
	int i = 1;	
	int i2 = 0;
	int j = 1;  //6747个首字
	int k = 1;  //以某个首字开头的词的个数
	boolean mod = false ; //标志
	boolean cunzai = false; //标志
	String [] f = new String [6748]; //存放所有首字  
	String [] str = new String [1251585];  //存放所有词
	String [][] w = new String [6849][];  //存放某一首字下的词
	int [] wordNum = new int [6848] ;  //存放某一首字下词的个数
	File file1 = new File("6.txt");
	File file2 = new File("11.txt");
	String [] str2 = new String [550168];
	int y = 0;

	int min(int m,int y)
	{
		if(m>y)
			return y;
		else
			return m;
	}
	
	void comWord(int x)
	{
		cunzai = false;
		for(int q = 1;q <= wordNum[x];q++)//某一首字下的各词查找
		{
			if(s2.equals(w[x][q]))//查找到相同的词,返回
			{
				cunzai = true;
				//System.out .println(i2 + ":" + s2 + "same") ;
				break;
			}
		}
		if(!cunzai) //不存在,需要插入的词
		{
			y++;
			str2[y] = s2;
		
////////////////查找插入的位置
			String [] oldw = new String [w[x].length ];
			for(int q =1 ;q<= wordNum[x];q++)
			{
				oldw[q] = w[x][q];
			}
			int ix =0;
			//boolean sma = false;
			boolean same = false;
			
			for(int q =1 ;q<= wordNum[x];q++)
			{
				if(s2.substring(1, 2).equals(w[x][q].substring(1, 2)))
				{
					same = true;
					String pys2 = GB2Alpha.getFullSpell(s2);
					String pyw = GB2Alpha.getFullSpell(w[x][q]);
				//	System.out.println(pys2 + " " + pyw + " "+ q);
						if(pys2.compareTo(pyw) < 0)
						{
							if(q == 1)
							{
								ix = 1;break;
							}
							for(int tq = q;tq < wordNum[x];tq++)
							{	
								if((w[x][tq-1].length()>=3)&&(w[x][tq].length()>=3))
								{
									if(!w[x][tq].substring(2,3).equals(w[x][tq-1].substring(2,3)))
									{
										ix = tq - 1;break;
									}
								}
							}
							ix = q - 1;break;
						}
						else
						{
							if(q == wordNum[x])
							{
								ix = q;		
							}else {
								if(!w[x][q+1].substring(1,2).equals(s2.substring(1,2)))
								{
									ix = q;	
								}
							}
							continue;
						}
				//	}
				}
			//}		
			}
			if(!same)
			{for(int q =1 ;q<= wordNum[x];q++)
			{
				String pys2 = GB2Alpha.getFullSpell(s2.substring(1));
				String pyw = GB2Alpha.getFullSpell(w[x][q].substring(1));
				byte[] bytes = (String.valueOf(pyw.substring(0, 1))).getBytes();
			//	System.out.println(pys2 + " " + pyw+" "+bytes.length);
				if(bytes.length==1)
				{
				if(pys2.compareTo(pyw) < 0)
					{
						if(q == 1)
						{
							ix = 1;break;
						}else if(w[x][q].substring(1,2).equals(w[x][q-1].substring(1,2)))
						{
							ix = q ;
						}
						//System.out.println(pys2 + " " + pyw);
						ix = q - 1;break;
					}
					else
					{
						if(q == wordNum[x])
						{
							ix = q;		break;
						}
						continue;
					}
				}
			}
			}
			//	System.out.print(x + " " + ix + ":" + w[x][ix]);
///////////插入				
				w[x]= new String [w[x].length +1];
				wordNum[x]++;
				for(int a =1 ;a<= ix;a++)
				{
					w[x][a] = oldw[a];
				}
				w[x][ix+1 ]= s2;
				for(int a= ix+2;a<w[x].length ;a++)
				{
					w[x][a]= oldw[a-1];
				}
		/*		
				for(int a=1 ;a<w[x].length ;a++)
				{
					System.out.println(w[x][a]);
				}
		*/		
		//	System.out.println(y + "=" + str2[y]) ;
						System.out.println(y) ;
			}
	}
	
	/////从第二个词库读取词,以同首字为范围查找,不存在则插入
	void comFirWord()
	{
		try{
			FileReader fileIn2 = new FileReader(file2);
			BufferedReader in2 = new BufferedReader(fileIn2);
			
			while((s2 = in2.readLine()) != null)
			{
				i2++;
				mod = false;
				for(int p = 1;p<=j;p++)//逐个首字查找
				{
					if((s2!= null) && ( s2.substring( 0,1).equals( f[p])))//查找到同首字
					{
						mod = true;
						comWord(p);//参数p为行数
						break;
					}
				}
				if(!mod )//首字不存在,词即不存在
				{
					y++;
					str2[y] = s2;
					System.out.println(str2[y]) ;
					/*		////选择插入的位置
					int insertf = 0;
					String oldf [] = new String [j+1]; 
					for (int fir = 1;fir <= j;fir ++)
					{
						oldf[fir] = f[fir];
					}
					for(int fir = 1;fir <= j;fir ++)
					{
						String pys = GB2Alpha.getFullSpell(s2.substring(0, 1));
						String pyf = GB2Alpha.getFullSpell(f[fir]);
						if(pys.compareTo(pyf) < 0)
						{
							System.out.println(f[fir]);
							insertf = fir ;break;
						}
					}
					
				/////将首字不同的插入
					f = new String [f.length + 1];
					for(int a = 1;a < insertf;a ++)
					{
						f[a] = oldf[a];
					}
					
					for(int a = insertf + 1; a < f.length ;a ++)
					{
						f[a] = oldf[a-1];
					}
					System.out.println(insertf + " " + f[insertf] + "   " + s2);
				//	System.out.println(w[f.length-2].length);
						
					for(int a = f.length-1 ;a > insertf ;a --)
					{
						w[a] = new String [w[a-1].length];
						for(int b = 1; b < w[a-1].length ;b++)
						{
							w[a][b] = w[a-1][b];
							//System.out.println(a + " " +b+ " " + w[a][b]);
						}
					}
					f[insertf] = s2.substring(0, 1);
					w[insertf] = new String [2];
					w[insertf][1] = s2;
				
					for(int fir=1;fir <= f.length ;fir++)
					{
						//System.out.println(fir + " " + f[fir]);
						for(int word = 1;word < w[fir].length ;word++)
						{
							System.out.println(fir + " " + word + ":" + w[fir][word]);
						}
					}
			*/		
					
				}
			}
			in2.close();
		}
		catch(IOException ee)
		{
			System.out .println("error:" + ee) ;
		}
	}
	
	
	////////从一个词库中读取所有词并存入数组
	void readword()
	{
		try{
			FileReader fileIn1 = new FileReader(file1);
			BufferedReader in1 = new BufferedReader(fileIn1);
			
			s1 = in1.readLine();
			/////////////////////////////第一个词
			f[j] = s1.substring(0,1);
			str[i] = s1;
		//	System.out .println(j + ":" + f[j]) ;
			s1 = in1.readLine();
			while(s1!= null)
				{
					i++;
					str[i] = s1.trim(); 
					if(str[i].substring( 0,1) .equals( str[i-1].substring(0,1)) )
					{
						k++;
					}
					else
					{
						wordNum[j] = k;
					//	System.out.println(j +":"+ wordNum[j]) ;
						w[j] = new String [k+1];
						for(int t=k;t>0;t--)
						{
							w[j][k-t+1] = str[i-t];
							//System.out .println(j + "-" + (k-t+1) + ":" +w[j][k-t+1]);
						}
						j++;
						k = 1;
						f[j] = s1.substring( 0,1);
						//System.out .println(j + ":" + f[j]);
					}
					s1 = in1.readLine();
				}
			if(s1==null)
			{
				wordNum[j] = k;
				w[j] = new String [k+1];
				for(int t=k-1;t>=0;t--)
				{
					w[j][k-t] = str[i-t];
			//		System.out .println(j + "-" + (k-t) + ":" +w[j][k-t]);
				}
			}
			in1.close();
			comFirWord();
																						
/////////将合并结果写入
			File file = new File("1.txt");
			FileWriter fileOut = new FileWriter(file);
			BufferedWriter out = new BufferedWriter(fileOut);
																					
			for(int fir=1;fir < f.length;fir++)
			{
				//System.out.println(fir + " " + f[fir]);
				for(int word = 1;word < w[fir].length ;word++)
				{
					//System.out.println(fir + " " + word + ":" + w[fir][word]);
					out.write(w[fir][word],0,w[fir][word].length());
					out.newLine();
				}
			}
			/*				
			for(int a =1;a<str2.length ;a++)
			{
				if(str2[a]!= null)
				{
					out.write( str2[a],0,str2[a].length() );
					out.newLine() ;
				}
			}
			*/			
			out.close();
		
		}
		catch(IOException e)
		{
			System.out .println("error is :" + e) ;
		}
		
	}
}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -