⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 fenci.java~13~

📁 英语的自动句子权值计算程序
💻 JAVA~13~
字号:
package abstrac;

import java.util.*;

/**
  * 分词
  *
  * @param source
  *            待分的字符串
  * @return String[]
  */
 public class Fenci{
     public String[] fenci1 (String source) {
            Vector vector = new Vector();
            String[] array;
            /* 分隔符的集合 */
            String delimiters =
                    " \t\n\r\f~!@#$%^&*()_+|`1234567890-=\\{}[]:\";'<>?,./'";

            /* 根据分隔符分词 */
            StringTokenizer stringTokenizer = new StringTokenizer(source,
                    delimiters);

            while (stringTokenizer.hasMoreTokens()) {
                String token = stringTokenizer.nextToken().toLowerCase().trim();
                if(
                 vector.addElement(token);
            }

            /*根据大写首字母分词
         while (stringTokenizer.hasMoreTokens()) {
                String token = stringTokenizer.nextToken();
                int index = 0;
                flag1:while (index < token.length()) {
                    flag2:while (true) {
                        index++;
                        if ((index == token.length())
                            || !Character.isLowerCase(token.charAt(index))) {
                            break flag2;
                        }
                    }
                    vector.addElement(token.substring(0, index));
                    //System.out.println("识别出" + token.substring(0, index));
                    token = token.substring(index);
                    //System.out.println("剩余" + token);
                    index = 0;
                    continue flag1;
                }
            }
           */

            /* 复数转单数
             * 参考以下文档:
             * http://ftp.haie.edu.cn/Resource/GZ/GZYY/DCYFWF/NJSYYY/421b0061ZW_0015.htm
             */
            for (int i = 0; i < vector.size(); i++) {
                String token = (String) vector.elementAt(i);
                if (token.equalsIgnoreCase("feet")) {
                    token = "foot";
                } else if (token.equalsIgnoreCase("geese")) {
                    token = "goose";
                } else if (token.equalsIgnoreCase("lice")) {
                    token = "louse";
                } else if (token.equalsIgnoreCase("mice")) {
                    token = "mouse";
                } else if (token.equalsIgnoreCase("teeth")) {
                    token = "tooth";
                } else if (token.equalsIgnoreCase("oxen")) {
                    token = "ox";
                } else if (token.equalsIgnoreCase("children")) {
                    token = "child";
                } else if (token.endsWith("men")) {
                    token = token.substring(0, token.length() - 3) + "man";
                } else if (token.endsWith("ies")) {
                    token = token.substring(0, token.length() - 3) + "y";
                } else if (token.endsWith("ves")) {
                    if (token.equalsIgnoreCase("knives")
                        || token.equalsIgnoreCase("wives")
                        || token.equalsIgnoreCase("lives")) {
                        token = token.substring(0, token.length() - 3) + "fe";
                    } else {
                        token = token.substring(0, token.length() - 3) + "f";
                    }
                } else if (token.endsWith("oes") || token.endsWith("ches")
                           || token.endsWith("shes") || token.endsWith("ses")
                           || token.endsWith("xes")) {
                    token = token.substring(0, token.length() - 2);
                } else if (token.endsWith("s")) {
                    if(!(token.equalsIgnoreCase("as")||token.equalsIgnoreCase("was")
                         ||token.equalsIgnoreCase("besides")||token.equalsIgnoreCase("is")
                         ||token.equalsIgnoreCase("has")||token.equalsIgnoreCase("this")
                         ||token.equalsIgnoreCase("us")||token.equalsIgnoreCase("unless")
                         ||token.equalsIgnoreCase("neverthless")||token.equalsIgnoreCase("across")
                         ||token.equalsIgnoreCase("towards")||token.equalsIgnoreCase("thus")
                         ||token.equalsIgnoreCase("always")))
                        token = token.substring(0, token.length() - 1);
                }

                /* 处理完毕 */
                vector.setElementAt(token, i);
            }

            /* 转为数组形式 */
            array = new String[vector.size()];
            Enumeration enumeration = vector.elements();
            int index = 0;
            while (enumeration.hasMoreElements()) {
                array[index] = (String) enumeration.nextElement();
                index++;
            }


            /* 返回 */
            return array;
        }

 }

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -