⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 soundextest.java

📁 一个很实用的东东
💻 JAVA
字号:
/*
 * Copyright 2001-2004 The Apache Software Foundation.
 * 
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 * 
 *      http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */ 

// (FYI: Formatted and sorted with Eclipse)
package org.apache.commons.codec.language;

import junit.framework.Test;
import junit.framework.TestSuite;
import org.apache.commons.codec.EncoderException;
import org.apache.commons.codec.StringEncoder;
import org.apache.commons.codec.StringEncoderAbstractTest;

/**
 * Tests {@link Soundex}
 * 
 * @version $Id: SoundexTest.java,v 1.18 2004/06/02 00:55:38 ggregory Exp $
 * @author Apache Software Foundation
 */
public class SoundexTest extends StringEncoderAbstractTest {

    public static Test suite() {
        return (new TestSuite(SoundexTest.class));
    }

    private Soundex encoder = null;

    public SoundexTest(String name) {
        super(name);
    }

    void encodeAll(String[] strings, String expectedEncoding) {
        for (int i = 0; i < strings.length; i++) {
            assertEquals(expectedEncoding, this.getEncoder().encode(strings[i]));
        }
    }
    
    /**
	 * @return Returns the _encoder.
	 */
    public Soundex getEncoder() {
        return this.encoder;
    }

    protected StringEncoder makeEncoder() {
        return new Soundex();
    }

    /**
	 * @param encoder
	 *                  The encoder to set.
	 */
    public void setEncoder(Soundex encoder) {
        this.encoder = encoder;
    }

    public void setUp() throws Exception {
        super.setUp();
        this.setEncoder(new Soundex());
    }

    public void tearDown() throws Exception {
        super.tearDown();
        this.setEncoder(null);
    }

    public void testB650() {
        this.encodeAll(
            new String[] {
                "BARHAM",
                "BARONE",
                "BARRON",
                "BERNA",
                "BIRNEY",
                "BIRNIE",
                "BOOROM",
                "BOREN",
                "BORN",
                "BOURN",
                "BOURNE",
                "BOWRON",
                "BRAIN",
                "BRAME",
                "BRANN",
                "BRAUN",
                "BREEN",
                "BRIEN",
                "BRIM",
                "BRIMM",
                "BRINN",
                "BRION",
                "BROOM",
                "BROOME",
                "BROWN",
                "BROWNE",
                "BRUEN",
                "BRUHN",
                "BRUIN",
                "BRUMM",
                "BRUN",
                "BRUNO",
                "BRYAN",
                "BURIAN",
                "BURN",
                "BURNEY",
                "BYRAM",
                "BYRNE",
                "BYRON",
                "BYRUM" },
            "B650");
    }

    public void testDifference() throws EncoderException {
        // Edge cases
        assertEquals(0, this.getEncoder().difference(null, null));
        assertEquals(0, this.getEncoder().difference("", ""));
        assertEquals(0, this.getEncoder().difference(" ", " "));
        // Normal cases
        assertEquals(4, this.getEncoder().difference("Smith", "Smythe"));
        assertEquals(2, this.getEncoder().difference("Ann", "Andrew"));
        assertEquals(1, this.getEncoder().difference("Margaret", "Andrew"));
        assertEquals(0, this.getEncoder().difference("Janet", "Margaret"));
        // Examples from http://msdn.microsoft.com/library/default.asp?url=/library/en-us/tsqlref/ts_de-dz_8co5.asp
        assertEquals(4, this.getEncoder().difference("Green", "Greene"));
        assertEquals(0, this.getEncoder().difference("Blotchet-Halls", "Greene"));
        // Examples from http://msdn.microsoft.com/library/default.asp?url=/library/en-us/tsqlref/ts_setu-sus_3o6w.asp
        assertEquals(4, this.getEncoder().difference("Smith", "Smythe"));
        assertEquals(4, this.getEncoder().difference("Smithers", "Smythers"));
        assertEquals(2, this.getEncoder().difference("Anothers", "Brothers"));        
    }

    public void testEncodeBasic() {
        assertEquals("T235", this.getEncoder().encode("testing"));
        assertEquals("T000", this.getEncoder().encode("The"));
        assertEquals("Q200", this.getEncoder().encode("quick"));
        assertEquals("B650", this.getEncoder().encode("brown"));
        assertEquals("F200", this.getEncoder().encode("fox"));
        assertEquals("J513", this.getEncoder().encode("jumped"));
        assertEquals("O160", this.getEncoder().encode("over"));
        assertEquals("T000", this.getEncoder().encode("the"));
        assertEquals("L200", this.getEncoder().encode("lazy"));
        assertEquals("D200", this.getEncoder().encode("dogs"));
    }

    /**
	 * Examples from
	 * http://www.bradandkathy.com/genealogy/overviewofsoundex.html
	 */
    public void testEncodeBatch2() {
        assertEquals("A462", this.getEncoder().encode("Allricht"));
        assertEquals("E166", this.getEncoder().encode("Eberhard"));
        assertEquals("E521", this.getEncoder().encode("Engebrethson"));
        assertEquals("H512", this.getEncoder().encode("Heimbach"));
        assertEquals("H524", this.getEncoder().encode("Hanselmann"));
        assertEquals("H431", this.getEncoder().encode("Hildebrand"));
        assertEquals("K152", this.getEncoder().encode("Kavanagh"));
        assertEquals("L530", this.getEncoder().encode("Lind"));
        assertEquals("L222", this.getEncoder().encode("Lukaschowsky"));
        assertEquals("M235", this.getEncoder().encode("McDonnell"));
        assertEquals("M200", this.getEncoder().encode("McGee"));
        assertEquals("O155", this.getEncoder().encode("Opnian"));
        assertEquals("O155", this.getEncoder().encode("Oppenheimer"));
        assertEquals("R355", this.getEncoder().encode("Riedemanas"));
        assertEquals("Z300", this.getEncoder().encode("Zita"));
        assertEquals("Z325", this.getEncoder().encode("Zitzmeinn"));
    }

    /**
	 * Examples from
	 * http://www.archives.gov/research_room/genealogy/census/soundex.html
	 */
    public void testEncodeBatch3() {
        assertEquals("W252", this.getEncoder().encode("Washington"));
        assertEquals("L000", this.getEncoder().encode("Lee"));
        assertEquals("G362", this.getEncoder().encode("Gutierrez"));
        assertEquals("P236", this.getEncoder().encode("Pfister"));
        assertEquals("J250", this.getEncoder().encode("Jackson"));
        assertEquals("T522", this.getEncoder().encode("Tymczak"));
        // For VanDeusen: D-250 (D, 2 for the S, 5 for the N, 0 added) is also
        // possible.
        assertEquals("V532", this.getEncoder().encode("VanDeusen"));
    }

    /**
	 * Examples from: http://www.myatt.demon.co.uk/sxalg.htm
	 */
    public void testEncodeBatch4() {
        assertEquals("H452", this.getEncoder().encode("HOLMES"));
        assertEquals("A355", this.getEncoder().encode("ADOMOMI"));
        assertEquals("V536", this.getEncoder().encode("VONDERLEHR"));
        assertEquals("B400", this.getEncoder().encode("BALL"));
        assertEquals("S000", this.getEncoder().encode("SHAW"));
        assertEquals("J250", this.getEncoder().encode("JACKSON"));
        assertEquals("S545", this.getEncoder().encode("SCANLON"));
        assertEquals("S532", this.getEncoder().encode("SAINTJOHN"));

    }

	public void testBadCharacters() {
		assertEquals("H452", this.getEncoder().encode("HOL>MES"));

	}

    public void testEncodeIgnoreApostrophes() {
        this.encodeAll(new String[] { "OBrien", "'OBrien", "O'Brien", "OB'rien", "OBr'ien", "OBri'en", "OBrie'n", "OBrien'" }, "O165");
    }

    /**
	 * Test data from http://www.myatt.demon.co.uk/sxalg.htm
	 */
    public void testEncodeIgnoreHyphens() {
        this.encodeAll(
            new String[] {
                "KINGSMITH",
                "-KINGSMITH",
                "K-INGSMITH",
                "KI-NGSMITH",
                "KIN-GSMITH",
                "KING-SMITH",
                "KINGS-MITH",
                "KINGSM-ITH",
                "KINGSMI-TH",
                "KINGSMIT-H",
                "KINGSMITH-" },
            "K525");
    }

    public void testEncodeIgnoreTrimmable() {
        assertEquals("W252", this.getEncoder().encode(" \t\n\r Washington \t\n\r "));
    }

    /**
	 * Consonants from the same code group separated by W or H are treated as
	 * one.
	 */
    public void testHWRuleEx1() {
        // From
        // http://www.archives.gov/research_room/genealogy/census/soundex.html:
        // Ashcraft is coded A-261 (A, 2 for the S, C ignored, 6 for the R, 1
        // for the F). It is not coded A-226.
        assertEquals("A261", this.getEncoder().encode("Ashcraft"));
    }

    /**
	 * Consonants from the same code group separated by W or H are treated as
	 * one.
	 * 
	 * Test data from http://www.myatt.demon.co.uk/sxalg.htm
	 */
    public void testHWRuleEx2() {
        assertEquals("B312", this.getEncoder().encode("BOOTHDAVIS"));
        assertEquals("B312", this.getEncoder().encode("BOOTH-DAVIS"));
    }

    /**
	 * Consonants from the same code group separated by W or H are treated as
	 * one.
	 */
    public void testHWRuleEx3() {
        assertEquals("S460", this.getEncoder().encode("Sgler"));
        assertEquals("S460", this.getEncoder().encode("Swhgler"));
        // Also S460:
        this.encodeAll(
            new String[] {
                "SAILOR",
                "SALYER",
                "SAYLOR",
                "SCHALLER",
                "SCHELLER",
                "SCHILLER",
                "SCHOOLER",
                "SCHULER",
                "SCHUYLER",
                "SEILER",
                "SEYLER",
                "SHOLAR",
                "SHULER",
                "SILAR",
                "SILER",
                "SILLER" },
            "S460");
    }

    public void testMaxLength() throws Exception {
        Soundex soundex = new Soundex();
        soundex.setMaxLength(soundex.getMaxLength());
        assertEquals("S460", this.getEncoder().encode("Sgler"));
    }

    public void testMaxLengthLessThan3Fix() throws Exception {
        Soundex soundex = new Soundex();
        soundex.setMaxLength(2);
        assertEquals("S460", soundex.encode("SCHELLER"));
    }

    /**
     * Examples for MS SQLServer from
     * http://msdn.microsoft.com/library/default.asp?url=/library/en-us/tsqlref/ts_setu-sus_3o6w.asp
     */
    public void testMsSqlServer1() {
        assertEquals("S530", this.getEncoder().encode("Smith"));
        assertEquals("S530", this.getEncoder().encode("Smythe"));
    }

    /**
     * Examples for MS SQLServer from
     * http://support.microsoft.com/default.aspx?scid=http://support.microsoft.com:80/support/kb/articles/Q100/3/65.asp&NoWebContent=1
     */
    public void testMsSqlServer2() {
        this.encodeAll(new String[]{"Erickson", "Erickson", "Erikson", "Ericson", "Ericksen", "Ericsen"}, "E625");
    }
    /**
     * Examples for MS SQLServer from
     * http://databases.about.com/library/weekly/aa042901a.htm
     */
    public void testMsSqlServer3() {
        assertEquals("A500", this.getEncoder().encode("Ann"));
        assertEquals("A536", this.getEncoder().encode("Andrew"));
        assertEquals("J530", this.getEncoder().encode("Janet"));
        assertEquals("M626", this.getEncoder().encode("Margaret"));
        assertEquals("S315", this.getEncoder().encode("Steven"));
        assertEquals("M240", this.getEncoder().encode("Michael"));
        assertEquals("R163", this.getEncoder().encode("Robert"));
        assertEquals("L600", this.getEncoder().encode("Laura"));
        assertEquals("A500", this.getEncoder().encode("Anne"));
    }

    /**
     * Fancy characters are not mapped by the default US mapping.
     * 
     * http://nagoya.apache.org/bugzilla/show_bug.cgi?id=29080
     */
    public void testUsMappingOWithDiaeresis() {
        assertEquals("O000", this.getEncoder().encode("o"));
        try {
            assertEquals("

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -