⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 kmeansclusterertest.java

📁 一个自然语言处理的Java开源工具包。LingPipe目前已有很丰富的功能
💻 JAVA
字号:
package com.aliasi.test.unit.cluster;import com.aliasi.cluster.Clusterer;import com.aliasi.cluster.KMeansClusterer;import com.aliasi.tokenizer.IndoEuropeanTokenizerFactory;import com.aliasi.tokenizer.TokenFeatureExtractor;import com.aliasi.test.unit.BaseTestCase;import java.util.HashSet;import java.util.Random;import java.util.Set;public class KMeansClustererTest extends BaseTestCase {    static TokenFeatureExtractor FEATURE_EXTRACTOR        = new TokenFeatureExtractor(IndoEuropeanTokenizerFactory.FACTORY);    static String AA = "A A";    static String AAA = "A A A";    static String BBB = "B B B";    static String CCC = "C C C";    static String AAB = "A A B";    public void testZero() {        int numClusters = 0;        int maxIterations = 10;        try {            KMeansClusterer<String> clusterer                = new KMeansClusterer(FEATURE_EXTRACTOR,                                      numClusters,                                      maxIterations);            fail();        } catch (IllegalArgumentException e) {            succeed();        }        numClusters = 10;        maxIterations = -1;        try {            KMeansClusterer<String> clusterer                = new KMeansClusterer(FEATURE_EXTRACTOR,                                      numClusters,                                      maxIterations);            fail();        } catch (IllegalArgumentException e) {            succeed();        }    }    public void testOne() {        int numClusters = 1;        int maxIterations = 10;        KMeansClusterer<String> clusterer            = new KMeansClusterer(FEATURE_EXTRACTOR,                                  numClusters,                                  maxIterations);        assertCluster(clusterer,                      new String[] { },                      new String[][] { });        assertCluster(clusterer,                      new String[] { AAA },                      new String[][] { { AAA } });        assertCluster(clusterer,                      new String[] { AAA, BBB },                      new String[][] { { AAA, BBB } });        assertCluster(clusterer,                      new String[] { AAA, BBB, CCC },                      new String[][] { { AAA, BBB, CCC } });    }    public void testTwo() {        int numClusters = 2;        int maxIterations = 10;        KMeansClusterer<String> clusterer            = new KMeansClusterer(FEATURE_EXTRACTOR,                                  numClusters,                                  maxIterations);        assertCluster(clusterer,                      new String[] { },                      new String[][] { });        assertCluster(clusterer,                      new String[] { AAA },                      new String[][] { { AAA } });        assertCluster(clusterer,                      new String[] { AAA, BBB },                      new String[][] { { AAA }, { BBB } });        // all inits produce same result in 2 iterations        // { AAA, AAB }, { BBB }        // { AAA, BBB }, { AAB }        // { AAB, BBB }, { AAA }        assertCluster(clusterer,                      new String[] { AA, AAB, BBB },                      new String[][] { { AA, AAB }, { BBB } });    }    public void testRandom() {        Random random = new Random();        int numInputs = 10; //random.nextInt(100) + 50;        Set<String> inputSet = new HashSet<String>();        for (int n = 0; n < numInputs; ++n) {            StringBuilder sb = new StringBuilder();            int length = random.nextInt(20) + 5;            int numChars = 2; // random.nextInt(16);            for (int i = 0; i < length; ++i) {                if (i > 0) sb.append(' ');                char c = (char) ( ((int)'a') + random.nextInt(numChars));                sb.append(c);            }            inputSet.add(sb.toString());        }        int maxIterations = 10; // random.nextInt(50) + 50;        int numClusters = 3; // random.nextInt(10) + 5;        KMeansClusterer<String> clusterer            = new KMeansClusterer(FEATURE_EXTRACTOR,                                  numClusters,                                  maxIterations);        Set<Set<String>> clustering =            clusterer.cluster(inputSet);        assertCovers(clustering,inputSet);    }    void assertCovers(Set<Set<String>> clustering, Set<String> elts) {        Set<String> clusteringElts = new HashSet<String>();        for (Set<String> cluster : clustering) {            assertTrue(cluster.size() > 0);            clusteringElts.addAll(cluster);        }        assertEquals(elts,clusteringElts);    }    void assertCluster(Clusterer clusterer,                       String[] inputs,                       String[][] expectedClusters) {        Set<Set<String>> expectedClustering            = new HashSet<Set<String>>();        for (int i = 0; i < expectedClusters.length; ++i)            expectedClustering.add(toSet(expectedClusters[i]));        Set<String> inputSet = toSet(inputs);        Set<Set<String>> clustering = clusterer.cluster(inputSet);        assertEquals(expectedClustering,clustering);    }    static Set<String> toSet(String[] xs) {        Set<String> result = new HashSet<String>();        for (int i = 0; i < xs.length; ++i)            result.add(xs[i]);        return result;    }}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -