⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 process.java

📁 该软件实现了正向增字最大匹配和未登陆词的识别.程序用java语言编写,界面使用简单友好.
💻 JAVA
字号:
/*
 * Process.java
 *
 * Created on 2007年5月20日, 上午10:01
 *
 * To change this template, choose Tools | Template Manager
 * and open the template in the editor.
 */

package cnu.nlp;

import java.util.*;

import org.apache.commons.lang.StringUtils;
import org.apache.commons.lang.math.NumberUtils;

/**
 *
 * @author gnehzuil
 */
public class Process {
    
    private static Process instance = new Process();
    
    public static Process getInstance() {
        return instance;
    }
    
    private static WordTable wordTable = WordTable.getInstance();
    
    private String inputString;
    private List<WordItem> result;
    
    /** Creates a new instance of Process */
    private Process() {
        result = new ArrayList<WordItem>();
    }
    
    public void setInputString(String inputString) {
        inputString = StringUtils.trim(inputString);
        this.inputString = StringUtils.strip(inputString, "\n");
    }
    
    public String getResult() {
        String res = "";
        for (WordItem item : result) {
            if (!item.isUnknown()) {
                res += item.getWord();
                res += "/ ";
            } else {
                res += item.getWord();
                res +="/nr ";
            }
        }
        return res;
    }
    
    public void doProcess() {
        normalProcess();
        firstNameProcess();
        numberProcess();
        alphaProcess();
    }
    
    private void normalProcess() {
        boolean flag = false;
        int begin = 0, pos = 1;
        
        result.clear();
        while (pos != inputString.length() + 1) {
            if (!wordTable
                    .lookupLogonWord(inputString.substring(begin, pos))) {
                WordItem item = new WordItem();
                if (!flag)
                    item.setWord(inputString.substring(begin, pos-1));
                else
                    item.setWord(inputString.substring(begin, pos));
                item.setUnknown(false);
                result.add(item);
                if (!flag) {
                    flag = true;
                    begin = pos-1;
                } else {
                    flag = false;
                    begin = pos;
                    ++pos;
                }
            } else if (pos == inputString.length()) {
                WordItem item = new WordItem();
                item.setWord(inputString.substring(begin, pos));
                item.setUnknown(false);
                result.add(item);
                ++pos;
            } else {
                flag = false;
                ++pos;
            }
        }
    }
    
    private void firstNameProcess() {
        // process postfix
        for (int i = 0; i < result.size(); i++) {
            if (wordTable
                    .lookupPostfixFirstName(result.get(i).getWord())) {
                doPostfixFirstName(i);
            }
        }
        
        // process prefix
        for (int i = 0; i < result.size(); i++) {
            if (wordTable.lookupPrefixFirstName(result.get(i).getWord())) {
                doPrefixFirstName(i);
            }
        }
        
        // process firstname
        for (int i = 0; i < result.size(); i++) {
            if (wordTable.lookupFirstName(result.get(i).getWord())) {
                doFirstName(i);
            }
        }
    }
    
    private void doPostfixFirstName(int pos) {
        if (pos - 4 >= 0 &&
                (wordTable.lookupFirstName(result.get(pos - 4).getWord()) ||
                wordTable.lookupTransName(result.get(pos - 4).getWord()))) {
            String word =
                    result.get(pos - 4).getWord() +
                    result.get(pos - 3).getWord() +
                    result.get(pos - 2).getWord() +
                    result.get(pos - 1).getWord();
            result.get(pos - 4).setWord(word);
            result.get(pos - 4).setUnknown(true);
            for (int i = 0; i < 3; i++)
                result.remove(pos - 3);
        } else if (pos - 3 >= 0 &&
                (wordTable.lookupFirstName(result.get(pos - 3).getWord()) ||
                wordTable.lookupTransName(result.get(pos - 3).getWord()))) {
            String word =
                    result.get(pos - 3).getWord() +
                    result.get(pos - 2).getWord() +
                    result.get(pos - 1).getWord();
            result.get(pos - 3).setWord(word);
            result.get(pos - 3).setUnknown(true);
            for (int i = 0; i < 2; i++)
                result.remove(pos - 2);
        } else if (pos - 2 >= 0 &&
                (wordTable.lookupFirstName(result.get(pos - 2).getWord()) ||
                wordTable.lookupTransName(result.get(pos - 2).getWord()))) {
            String word =
                    result.get(pos - 2).getWord() +
                    result.get(pos - 1).getWord();
            result.get(pos - 2).setWord(word);
            result.get(pos - 2).setUnknown(true);
            result.remove(pos - 1);
        } else if (pos - 1 >= 0 &&
                (wordTable.lookupFirstName(result.get(pos - 1).getWord()) &&
                wordTable.lookupTransName(result.get(pos - 1).getWord()))) {
            String word =
                    result.get(pos - 1).getWord() +
                    result.get(pos).getWord();
            result.get(pos - 1).setWord(word);
            result.get(pos - 1).setUnknown(true);
            result.remove(pos);
        }
    }
    
    private void doPrefixFirstName(int pos) {
        if (pos + 1 < result.size() &&
                (wordTable.lookupFirstName(result.get(pos + 1).getWord()) ||
                wordTable.lookupTransName(result.get(pos + 1).getWord()))) {
            if (pos + 2 < result.size() &&
                    1 == result.get(pos + 2).getWord().length() &&
                    wordTable.lookupLogonWord(result.get(pos + 2).getWord())) {
                if (pos + 3 < result.size() &&
                        1 == result.get(pos + 3).getWord().length() &&
                        wordTable
                        .lookupLogonWord(result.get(pos + 3).getWord())) {
                    String word =
                            result.get(pos + 1).getWord() +
                            result.get(pos + 2).getWord() +
                            result.get(pos + 3).getWord();
                    result.get(pos + 1).setWord(word);
                    result.get(pos + 1).setUnknown(true);
                    result.remove(pos + 2);
                    result.remove(pos + 2);
                } else {
                    String word =
                            result.get(pos + 1).getWord() +
                            result.get(pos + 2).getWord();
                    result.get(pos + 1).setWord(word);
                    result.get(pos + 1).setUnknown(true);
                    result.remove(pos + 2);
                }
            } else {
                result.get(pos + 1).setUnknown(true);
            }
        }
    }
    
    private void numberProcess() {
        doArabicNumberProcess();
        
        for (int i = 0; i < result.size(); i++) {
            if (wordTable.lookupChineseNumberWord(result.get(i).getWord()))
                doChNumberProcess(i);
        }
    }

    private void doArabicNumberProcess() {
        for (int i = 0; i < result.size(); i++) {
            String digit = result.get(i).getWord();
            if (digit.equals(""))
                continue;
            if (NumberUtils.isDigits(digit)) {
                doNumberProcess(i - 1);
            }
        }
    }
    
    private void doNumberProcess(int pos) {
        int begin = pos;
        int end = pos;
        for (int i = pos + 1; i < result.size(); i++) {
            String digit = result.get(i).getWord();
            if (digit.equals(""))
                continue;
            else if (NumberUtils.isDigits(digit))
                end = i;
            else
                break;
        }
        
        String word = "";
        for (int i = begin; i < end + 1; i++)
            word += result.get(i).getWord();
        result.get(pos).setWord(word);
        for (int i = begin + 1; i < end + 1; i++) {
            result.remove(pos + 1);
        }
    }
    
    private void doFirstName(int pos) {
        int begin = pos;
        int end = pos;
        if (pos + 1 < result.size() &&
                result.get(pos + 1).getWord().length() == 1 &&
                wordTable.lookupLogonWord(result.get(pos + 1).getWord())) {
            if (pos + 2 < result.size() &&
                    result.get(pos + 2).getWord().length() == 1 &&
                    wordTable
                    .lookupLogonWord(result.get(pos + 2).getWord()) &&
                    !wordTable
                    .lookupPrepWord(result.get(pos + 2).getWord())) {
                String word =
                        result.get(pos).getWord() +
                        result.get(pos + 1).getWord() +
                        result.get(pos + 2).getWord();
                result.get(pos).setWord(word);
                result.get(pos).setUnknown(true);
                result.remove(pos + 1);
                result.remove(pos + 1);
            } else {
                String word =
                        result.get(pos).getWord() +
                        result.get(pos + 1).getWord();
                result.get(pos).setWord(word);
                result.get(pos).setUnknown(true);
                result.remove(pos + 1);
            }
        }
    }

    private void alphaProcess() {
        for (int i = 0; i < result.size(); i++) {
            String alpha = result.get(i).getWord();
            if (StringUtils.isAsciiPrintable(alpha)) {
                doAlphaProcess(i);
            }
        }        
    }

    private void doAlphaProcess(int pos) {
        int begin = pos;
        int end = pos;
        for (int i = pos + 1; i < result.size(); i++) {
            String alpha = result.get(i).getWord();
            if (StringUtils.isAlpha(alpha))
                end = i;
            else
                break;
        }
        
        String word = "";
        for (int i = begin; i < end + 1; i++)
            word += result.get(i).getWord();
        result.get(pos).setWord(word);
        for (int i = begin + 1; i < end + 1; i++) {
            result.remove(pos + 1);
        }
    }

    private void doChNumberProcess(int pos) {
        int begin = pos;
        int end = pos;
        for (int i = pos + 1; i < result.size(); i++) {
            String digit = result.get(i).getWord();
            if (wordTable.lookupChineseNumberWord(result.get(i).getWord()))
                end = i;
            else
                break;
        }
        
        String word = "";
        for (int i = begin; i < end + 1; i++)
            word += result.get(i).getWord();
        result.get(pos).setWord(word);
        for (int i = begin + 1; i < end + 1; i++) {
            result.remove(pos + 1);
        }
    }
    
}

class WordItem {
    private String word;
    private boolean unknown;
    
    public String getWord() {
        return word;
    }
    
    public void setWord(String word) {
        this.word = word;
    }
    
    public boolean isUnknown() {
        return unknown;
    }
    
    public void setUnknown(boolean unknown) {
        this.unknown = unknown;
    }
}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -