⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 myfenci.java

📁 采用JAVA实现两个词库的合并
💻 JAVA
字号:
import java.awt.*;
import java.awt.event.*;
import java.io.*;
import java.util.*;

class ciku
{
    ///////////////////////////////
	//从词库中将词调入内存,存入数组中//
	///////////////////////////////
	File file1 = new File("紫光输入法词库(548387词).txt");
	String s1 = null;//第一个词库的词
	int i = 1;	
	int j = 1;  //6747个首字
	int k = 1;  //以某个首字开头的词的个数
	static boolean mod = false ; //标志
	static boolean cunzai = false; //标志
	static String [] f = new String [6748]; //存放所有首字  
	static String [] str = new String [1251585];  //存放所有词
	static String [][] w = new String [6748][];  //存放某一首字下的词
	static int [] wordNum = new int [6748] ;  //存放某一首字下词的个数
	int y = 0;
	ciku() //将词调入内存
	{
	  try{
		FileReader fileIn1 = new FileReader(file1);
		BufferedReader in1 = new BufferedReader(fileIn1);
		
		s1 = in1.readLine();
		/////////////////////////////第一个词
		f[j] = s1.substring(0,1);
		str[i] = s1;
		//System.out .println(j + ":" + f[j]) ;
		s1 = in1.readLine();
		while(s1!= null)
			{
				i++;
				str[i] = s1.trim(); 
				if(str[i].substring( 0,1) .equals( str[i-1].substring(0,1)) )
				{
					k++;
				}
				else
				{
					wordNum[j] = k;
			//		System.out.println(j +":"+ wordNum[j]) ;
					w[j] = new String [k+1];
					for(int t=k;t>0;t--)
					{
						w[j][k-t+1] = str[i-t];
						//System.out .println(j + "-" + (k-t+1) + ":" +w[j][k-t+1]);
					}
					j++;
					k = 1;
					f[j] = s1.substring( 0,1);
				//	System.out .println(j + ":" + f[j]);
				}
				s1 = in1.readLine();
			}
		if(s1==null)
		{
			wordNum[j] = k;
			w[j] = new String [k+1];
			for(int t=k-1;t>=0;t--)
			{
				w[j][k-t] = str[i-t];
		//		System.out .println(j + "-" + (k-t) + ":" +w[j][k-t]);
			}
		}
		in1.close();
	}catch(IOException ee)	
		{
			System.out.println(ee);
		}
	}
	
	////////////////////////
	//从词库中查找词strCopy,找到说明其是词返回true,
	//否则返回false
	////////////////////////
	public static boolean compword(String strCopy)
	{
		mod = false;
		for(int p = 1;p< f.length ;p++)//逐个首字查找
		{
			if( strCopy.substring( 0,1).equals( f[p]))//查找到同首字
			{
				mod = true;
				cunzai = false;
				for(int q = 1;q <= wordNum[p];q++)//某一首字下的各词查找
				{
					if(strCopy.equals(w[p][q]))//查找到相同的词,返回
					{
						cunzai = true;
						return true;
					}
				}
				if(!cunzai)
				{
					return false;
				}
			}//System.out .println(strCopy.substring( 0,1)+"9"+ f[p]);
		}
		if(!mod )//首字不存在,即无匹配的词
		{
			return false;
		}else 
		{
			return true;
		}
	}
}

class Window extends Frame implements ActionListener
{
	MenuBar menubar;
	Menu menu1,menu2;
	MenuItem item1,item2;
	TextArea text1,text2;
	Button but1,but2;
	
	Window(String s)
	{
		super(s);
		setLayout(new FlowLayout());
		menubar = new MenuBar();
		menu1 = new Menu("文件");
		menu2 = new Menu("编辑");
		item1 = new MenuItem("打开");
		item2 = new MenuItem("保存");
		text1 = new TextArea(12,70);
		text2 = new TextArea(12,70);
		but1 = new Button(" 正向最大匹配算法 ");
		but2 = new Button(" 逆向最大匹配算法 ");
		add(text1);
		add(but1);
		add(but2);
		add(text2);
		menu1.add(item1);
		menu1.add(item2);
		menubar.add(menu1);
		menubar.add(menu2);
		setMenuBar(menubar);
		but1.addActionListener(this);
		but2.addActionListener(this);
		addWindowListener(new WindowAdapter()
							{
								public void windowClosing(WindowEvent e){
									System.exit(0);
								}
							});
		text2.setEditable(false);
		setBounds(100,100,530,480);
		setVisible(true);
		validate();
	}
	

	boolean find(String strCopy)
	{
		return ciku.compword(strCopy);
	}
	
	 int indexRes = 0;
	 String tt []  = new String [500];
	
	 //////////////////////
	 //实现正向最大匹配的方法//
	 //////////////////////
	void Departword(String t,int m,int n)
	{
		if(n-m < 2)
		{   
			tt[indexRes] = t.substring(m, n);
			indexRes ++;
			for(int temp = 0;temp<indexRes;temp++)
			{
				text2.append(tt[temp].trim() + " ");
				System.out .println(tt[temp].trim());
			}
			indexRes = 0;
		}
		else
		{
			int len = n - m;
			int i = 0;
			char [] copy = new char [len];
			for(i=0;i < len;i++)
			{
				copy[i] = t.substring(m+i).charAt(0);//charAt 读取相对于当前位置的给定索引处的字符
			}
			
			String strTemp = new String(copy);
			int las = 0;
			
	    	while(!find(strTemp.trim()))//find判断copy是否为词的方法
			{
	    		las ++;
	    		if(copy.length == las)
				{
	    			las = copy.length - 1;
					break;
				}
				copy[copy.length - las] = 0;
				strTemp = new String(copy);
			//	System.out .println(strTemp);
				
			}
	    	
			tt[indexRes] = new String(copy);
			indexRes ++;
			
			m = m+copy.length-las;
			Departword(t,m,n);
		}
	}
	
	 //////////////////////
	 //实现逆向最大匹配的方法//
	 //////////////////////
	void ReverseDepartword(String t,int m,int n)
	{
		if(t.length() < 2)  //最后一个字
		{
			tt[indexRes] = t.substring(m, n);
			indexRes ++;
			for(int temp = indexRes - 1;temp >= 0;temp--)
			{
				text2.append(tt[temp].trim() + " ");
				System.out .println(tt[temp].trim());
			}
			indexRes = 0;
		}
		else
		{
			int len = n - m;
			char []copy = new char [len];
			int i =  0;
			for(i = 0; i< len; i++)
			{
				copy[i] = t.substring(m + i).charAt(0);
			}
			
			int las = copy.length;
			String strTemp = new String(copy);
			while(!find(strTemp.trim()))
			{
				if(las == 1)
				{
					break;
				}
				copy[copy.length - las] = 0 ;
				strTemp = new String(copy);
				las -- ;
			}
			
			tt[indexRes] = new String(copy);
			indexRes ++;
			
			t = t.substring(0,copy.length - las);
			n = n - las;
			ReverseDepartword(t,0,n);
		}
	}
	
	public void actionPerformed(ActionEvent ee)
	{
		if(ee.getSource() == but1)//使用正向最大匹配算法分词
		{
			text2.setText(null);
			String text = text1.getText();
			StringTokenizer fenxi = new StringTokenizer (text," ,.;'\n'");
			//int n = fenxi.countTokens();//分成n个小段
			while (fenxi.hasMoreTokens())
			{
				String str = fenxi.nextToken();
				Departword(str,0,str.length());//实现分词的方法
			}
		}
		else if(ee.getSource() == but2)//使用逆向最大匹配算法分词
		{
			text2.setText(null);
			String text = text1.getText();
			StringTokenizer fenxi = new StringTokenizer (text," ,.;'\n'");
			while (fenxi.hasMoreTokens())
			{
				String str = fenxi.nextToken();
				ReverseDepartword(str,0,str.length());//实现分词的方法
			}
		}
		
	}
}
public class myFenCi {
	/**
	 * 实现分词
	 */
	public static void main(String[] args) {
		// TODO Auto-generated method stub
		ciku ck = new ciku() ;
		Window win = new Window("分词窗口");
	}
}


⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -