📄 doublemetaphone.java
字号:
return index + 2;
} else if (conditionCH1(value, index)) {
//-- Germanic, Greek, or otherwise 'ch' for 'kh' sound --//
result.append('K');
return index + 2;
} else {
if (index > 0) {
if (contains(value, 0, 2, "MC")) {
result.append('K');
} else {
result.append('X', 'K');
}
} else {
result.append('X');
}
return index + 2;
}
}
/**
* Handles 'D' cases
*/
private int handleD(String value,
DoubleMetaphoneResult result,
int index) {
if (contains(value, index, 2, "DG")) {
//-- "Edge" --//
if (contains(value, index + 2, 1, "I", "E", "Y")) {
result.append('J');
index += 3;
//-- "Edgar" --//
} else {
result.append("TK");
index += 2;
}
} else if (contains(value, index, 2, "DT", "DD")) {
result.append('T');
index += 2;
} else {
result.append('T');
index++;
}
return index;
}
/**
* Handles 'G' cases
*/
private int handleG(String value,
DoubleMetaphoneResult result,
int index,
boolean slavoGermanic) {
if (charAt(value, index + 1) == 'H') {
index = handleGH(value, result, index);
} else if (charAt(value, index + 1) == 'N') {
if (index == 1 && isVowel(charAt(value, 0)) && !slavoGermanic) {
result.append("KN", "N");
} else if (!contains(value, index + 2, 2, "EY") &&
charAt(value, index + 1) != 'Y' && !slavoGermanic) {
result.append("N", "KN");
} else {
result.append("KN");
}
index = index + 2;
} else if (contains(value, index + 1, 2, "LI") && !slavoGermanic) {
result.append("KL", "L");
index += 2;
} else if (index == 0 && (charAt(value, index + 1) == 'Y' || contains(value, index + 1, 2, ES_EP_EB_EL_EY_IB_IL_IN_IE_EI_ER))) {
//-- -ges-, -gep-, -gel-, -gie- at beginning --//
result.append('K', 'J');
index += 2;
} else if ((contains(value, index + 1, 2, "ER") ||
charAt(value, index + 1) == 'Y') &&
!contains(value, 0, 6, "DANGER", "RANGER", "MANGER") &&
!contains(value, index - 1, 1, "E", "I") &&
!contains(value, index - 1, 3, "RGY", "OGY")) {
//-- -ger-, -gy- --//
result.append('K', 'J');
index += 2;
} else if (contains(value, index + 1, 1, "E", "I", "Y") ||
contains(value, index - 1, 4, "AGGI", "OGGI")) {
//-- Italian "biaggi" --//
if ((contains(value, 0 ,4, "VAN ", "VON ") || contains(value, 0, 3, "SCH")) || contains(value, index + 1, 2, "ET")) {
//-- obvious germanic --//
result.append('K');
} else if (contains(value, index + 1, 4, "IER")) {
result.append('J');
} else {
result.append('J', 'K');
}
index += 2;
} else if (charAt(value, index + 1) == 'G') {
index += 2;
result.append('K');
} else {
index++;
result.append('K');
}
return index;
}
/**
* Handles 'GH' cases
*/
private int handleGH(String value,
DoubleMetaphoneResult result,
int index) {
if (index > 0 && !isVowel(charAt(value, index - 1))) {
result.append('K');
index += 2;
} else if (index == 0) {
if (charAt(value, index + 2) == 'I') {
result.append('J');
} else {
result.append('K');
}
index += 2;
} else if ((index > 1 && contains(value, index - 2, 1, "B", "H", "D")) ||
(index > 2 && contains(value, index - 3, 1, "B", "H", "D")) ||
(index > 3 && contains(value, index - 4, 1, "B", "H"))) {
//-- Parker's rule (with some further refinements) - "hugh"
index += 2;
} else {
if (index > 2 && charAt(value, index - 1) == 'U' &&
contains(value, index - 3, 1, "C", "G", "L", "R", "T")) {
//-- "laugh", "McLaughlin", "cough", "gough", "rough", "tough"
result.append('F');
} else if (index > 0 && charAt(value, index - 1) != 'I') {
result.append('K');
}
index += 2;
}
return index;
}
/**
* Handles 'H' cases
*/
private int handleH(String value,
DoubleMetaphoneResult result,
int index) {
//-- only keep if first & before vowel or between 2 vowels --//
if ((index == 0 || isVowel(charAt(value, index - 1))) &&
isVowel(charAt(value, index + 1))) {
result.append('H');
index += 2;
//-- also takes car of "HH" --//
} else {
index++;
}
return index;
}
/**
* Handles 'J' cases
*/
private int handleJ(String value, DoubleMetaphoneResult result, int index,
boolean slavoGermanic) {
if (contains(value, index, 4, "JOSE") || contains(value, 0, 4, "SAN ")) {
//-- obvious Spanish, "Jose", "San Jacinto" --//
if ((index == 0 && (charAt(value, index + 4) == ' ') ||
value.length() == 4) || contains(value, 0, 4, "SAN ")) {
result.append('H');
} else {
result.append('J', 'H');
}
index++;
} else {
if (index == 0 && !contains(value, index, 4, "JOSE")) {
result.append('J', 'A');
} else if (isVowel(charAt(value, index - 1)) && !slavoGermanic &&
(charAt(value, index + 1) == 'A' || charAt(value, index + 1) == 'O')) {
result.append('J', 'H');
} else if (index == value.length() - 1) {
result.append('J', ' ');
} else if (!contains(value, index + 1, 1, L_T_K_S_N_M_B_Z) && !contains(value, index - 1, 1, "S", "K", "L")) {
result.append('J');
}
if (charAt(value, index + 1) == 'J') {
index += 2;
} else {
index++;
}
}
return index;
}
/**
* Handles 'L' cases
*/
private int handleL(String value,
DoubleMetaphoneResult result,
int index) {
result.append('L');
if (charAt(value, index + 1) == 'L') {
if (conditionL0(value, index)) {
result.appendAlternate(' ');
}
index += 2;
} else {
index++;
}
return index;
}
/**
* Handles 'P' cases
*/
private int handleP(String value,
DoubleMetaphoneResult result,
int index) {
if (charAt(value, index + 1) == 'H') {
result.append('F');
index += 2;
} else {
result.append('P');
index = contains(value, index + 1, 1, "P", "B") ? index + 2 : index + 1;
}
return index;
}
/**
* Handles 'R' cases
*/
private int handleR(String value,
DoubleMetaphoneResult result,
int index,
boolean slavoGermanic) {
if (index == value.length() - 1 && !slavoGermanic &&
contains(value, index - 2, 2, "IE") &&
!contains(value, index - 4, 2, "ME", "MA")) {
result.appendAlternate('R');
} else {
result.append('R');
}
return charAt(value, index + 1) == 'R' ? index + 2 : index + 1;
}
/**
* Handles 'S' cases
*/
private int handleS(String value,
DoubleMetaphoneResult result,
int index,
boolean slavoGermanic) {
if (contains(value, index - 1, 3, "ISL", "YSL")) {
//-- special cases "island", "isle", "carlisle", "carlysle" --//
index++;
} else if (index == 0 && contains(value, index, 5, "SUGAR")) {
//-- special case "sugar-" --//
result.append('X', 'S');
index++;
} else if (contains(value, index, 2, "SH")) {
if (contains(value, index + 1, 4,
"HEIM", "HOEK", "HOLM", "HOLZ")) {
//-- germanic --//
result.append('S');
} else {
result.append('X');
}
index += 2;
} else if (contains(value, index, 3, "SIO", "SIA") || contains(value, index, 4, "SIAN")) {
//-- Italian and Armenian --//
if (slavoGermanic) {
result.append('S');
} else {
result.append('S', 'X');
}
index += 3;
} else if ((index == 0 && contains(value, index + 1, 1, "M", "N", "L", "W")) || contains(value, index + 1, 1, "Z")) {
//-- german & anglicisations, e.g. "smith" match "schmidt" //
// "snider" match "schneider" --//
//-- also, -sz- in slavic language altho in hungarian it //
// is pronounced "s" --//
result.append('S', 'X');
index = contains(value, index + 1, 1, "Z") ? index + 2 : index + 1;
} else if (contains(value, index, 2, "SC")) {
index = handleSC(value, result, index);
} else {
if (index == value.length() - 1 && contains(value, index - 2,
2, "AI", "OI")){
//-- french e.g. "resnais", "artois" --//
result.appendAlternate('S');
} else {
result.append('S');
}
index = contains(value, index + 1, 1, "S", "Z") ? index + 2 : index + 1;
}
return index;
}
/**
* Handles 'SC' cases
*/
private int handleSC(String value,
DoubleMetaphoneResult result,
int index) {
if (charAt(value, index + 2) == 'H') {
//-- Schlesinger's rule --//
if (contains(value, index + 3,
2, "OO", "ER", "EN", "UY", "ED", "EM")) {
//-- Dutch origin, e.g. "school", "schooner" --//
if (contains(value, index + 3, 2, "ER", "EN")) {
//-- "schermerhorn", "schenker" --//
result.append("X", "SK");
} else {
result.append("SK");
}
} else {
if (index == 0 && !isVowel(charAt(value, 3)) && charAt(value, 3) != 'W') {
result.append('X', 'S');
} else {
result.append('X');
}
}
} else if (contains(value, index + 2, 1, "I", "E", "Y")) {
result.append('S');
} else {
result.append("SK");
}
return index + 3;
}
/**
* Handles 'T' cases
*/
private int handleT(String value,
DoubleMetaphoneResult result,
int index) {
if (contains(value, index, 4, "TION")) {
result.append('X');
index += 3;
} else if (contains(value, index, 3, "TIA", "TCH")) {
result.append('X');
index += 3;
} else if (contains(value, index, 2, "TH") || contains(value, index,
3, "TTH")) {
if (contains(value, index + 2, 2, "OM", "AM") ||
//-- special case "thomas", "thames" or germanic --//
contains(value, 0, 4, "VAN ", "VON ") ||
contains(value, 0, 3, "SCH")) {
result.append('T');
} else {
result.append('0', 'T');
}
index += 2;
} else {
result.append('T');
index = contains(value, index + 1, 1, "T", "D") ? index + 2 : index + 1;
}
return index;
}
/**
* Handles 'W' cases
*/
private int handleW(String value,
DoubleMetaphoneResult result,
int index) {
if (contains(value, index, 2, "WR")) {
//-- can also be in middle of word --//
result.append('R');
index += 2;
} else {
if (index == 0 && (isVowel(charAt(value, index + 1)) ||
contains(value, index, 2, "WH"))) {
if (isVowel(charAt(value, index + 1))) {
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -