📄 mmseg.java
字号:
/**
*
*/
package org.solol.mmseg.core;
/**
* @author solo L
*
*/
public class MMSeg {
private static AlgorithmFactory factory;
static {
try {
factory = AlgorithmFactory.getFactory();
} catch (ConfigurationException e) {
e.printStackTrace();
}
}
public static String segment(String content) throws AlgorithmException {
return segment(content, Config.SINGLE_WORD);
}
public static String segment(String content, String type)
throws AlgorithmException {
char[] chars = content.toCharArray();
StringBuffer sb = new StringBuffer();
IAlgorithm algorithm = factory.createAlgorithm(type);
//Set your AmbiguityHandler here.
//algorithm.setAmbiguityHandler(ambiguityHandler);
IWord word = algorithm.next(chars);
while (word != null) {
sb.append(word.getValue());
sb.append(Config.SEPARATOR);
word = algorithm.next(chars);
}
return sb.toString();
}
/**
* @param args
*/
public static void main(String[] args) {
try {
/*System.out.println("TEST MMRule:");
System.out.println("Simple ->" + MMSeg.segment("眼看就要来了"));
System.out.println("Complex->"
+ MMSeg.segment("眼看就要来了", Config.THREE_WORD));
System.out.println("Simple ->" + MMSeg.segment("眼看就要来"));
System.out.println("Complex->"
+ MMSeg.segment("眼看就要来", Config.THREE_WORD));
System.out.println("Simple ->" + MMSeg.segment("眼看就要"));
System.out.println("Complex->"
+ MMSeg.segment("眼看就要", Config.THREE_WORD));
System.out.println("Simple ->" + MMSeg.segment("眼看就"));
System.out.println("Complex->"
+ MMSeg.segment("眼看就", Config.THREE_WORD));
System.out.println();
System.out.println("TEST LAWLRule:");
System.out.println("Simple ->" + MMSeg.segment("国际化"));
System.out.println("Complex->"
+ MMSeg.segment("国际化", Config.THREE_WORD));
System.out.println();
System.out.println("TEST SVWLRule:");
System.out.println("Simple ->" + MMSeg.segment("研究生命起源"));
System.out.println("Complex->"
+ MMSeg.segment("研究生命起源", Config.THREE_WORD));
System.out.println();
System.out.println("TEST LSDMFOCWRule:");
System.out.println("Simple ->" + MMSeg.segment("是一个"));
System.out.println("Complex->"
+ MMSeg.segment("是一个", Config.THREE_WORD));
System.out.println();
System.out.println("Simple ->"
+ MMSeg.segment("我刚刚开始山地自行车运动的时候,我更愿意选用零件尽可能少尽可能简单的自行车。"));
System.out.println("Complex->"
+ MMSeg.segment("我刚刚开始山地自行车运动的时候,我更愿意选用零件尽可能少尽可能简单的自行车。",
Config.THREE_WORD));
System.out.println("Simple ->"
+ MMSeg.segment("资源描述框架是一个用于表达关于万维网上的资源的信息的语言。"));
System.out.println("Complex->"
+ MMSeg.segment("资源描述框架是一个用于表达关于万维网上的资源的信息的语言。",
Config.THREE_WORD));
System.out.println("Simple ->"
+ MMSeg.segment("RDF用于信息需要被应用程序处理而不是仅仅显示给人观看的场合。"));
System.out.println("Complex->"
+ MMSeg.segment("RDF用于信息需要被应用程序处理而不是仅仅显示给人观看的场合。",
Config.THREE_WORD));
System.out.println("Simple ->"
+ MMSeg.segment("RDF提供了一种基于XML的语法(称为RDF/XML)用于保存和交换RDF图。"));
System.out.println("Complex->"
+ MMSeg.segment("RDF提供了一种基于XML的语法(称为RDF/XML)用于保存和交换RDF图。",
Config.THREE_WORD));
System.out.println("Simple ->"
+ MMSeg.segment("2007年08月08日"));
System.out.println("Complex->"
+ MMSeg.segment("2007年08月08日",
Config.THREE_WORD));
System.out.println("Simple ->"
+ MMSeg.segment("http://www.example.org/index.html has a creation-date whose value is August 16, 1999."));
System.out.println("Complex->"
+ MMSeg.segment("http://www.example.org/index.html has a creation-date whose value is August 16, 1999.",
Config.THREE_WORD));
System.out.println("Simple ->"
+ MMSeg.segment("和服务于三日后裁制完毕,并呈送将军府中。"));
System.out.println("Complex->"
+ MMSeg.segment("和服务于三日后裁制完毕,并呈送将军府中。",
Config.THREE_WORD));
System.out.println("Simple ->"
+ MMSeg.segment("王府饭店的设施和服务是一流的。"));
System.out.println("Complex->"
+ MMSeg.segment("王府饭店的设施和服务是一流的。",
Config.THREE_WORD));
System.out.println("Simple ->"
+ MMSeg.segment("提高人民生活水平"));
System.out.println("Complex->"
+ MMSeg.segment("提高人民生活水平",
Config.THREE_WORD));
System.out.println("Simple ->"
+ MMSeg.segment("在这些企业中国有企业有十个。"));
System.out.println("Complex->"
+ MMSeg.segment("在这些企业中国有企业有十个。",
Config.THREE_WORD));
System.out.println("Simple ->"
+ MMSeg.segment("在这些企业中中国企业有十个。"));*/
System.out.println("Complex->"
+ MMSeg.segment("「在这些企业中中国企业有十个」",
Config.THREE_WORD));
} catch (AlgorithmException e) {
e.printStackTrace();
}
}
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -