📄 utility.java
字号:
* @param b2
* @return 返回第一次出现在位置。如果没有出现,则返回-1
*/
public static int strstr(byte[] b1, byte[] b2) {
boolean flag = true;
if (b1 != null && b2 != null) {
for (int i = 0; i < b1.length; i++) {
if (b1[i] != b2[0])
continue;
else {
if (b1.length - i >= b2.length) {
for (int j = 0; j < b2.length; j++) {
if (b2[j] != b1[i + j]) {
flag = false;
break;
}
}
if (flag) {
return i;
}
}
}
}
}
return -1;
}
public static int strchr(byte[] bs, byte b) {
if (bs != null) {
for (int i = 0; i < bs.length; i++) {
if (bs[i] == b)
return i;
}
}
return -1;
}
/**
* 比较两个字节数组前len个字节是否相等
*
* @param b1
* @param b2
* @param len
* @return
*/
public static boolean strncmp(byte[] b1, int startIndex, byte[] b2, int len) {
if (b1 != null && b2 != null && len > 0) {
if (b1.length >= len && b2.length >= len) {
for (int i = startIndex; i < len; i++) {
if (b1[i] != b2[i])
return true;
}
}
}
return false;
}
public static int getUnsigned(byte b) {
if (b > 0)
return (int) b;
else
return (b & 0x7F + 128);
}
public static void strncpy(byte[] dest, byte[] src, int len) {
if (dest != null && src != null) {
if (dest.length >= len && len <= src.length) {
for (int i = 0; i < len; i++)
dest[i] = src[i];
}
}
}
/**
* 汉字在6768区位表中对应的ID号
*/
public static int CC_ID(String str) {
int result = -1;
if (str != null && str.length() > 0) {
byte[] b = str.getBytes();
result = (getUnsigned(b[0]) - 176) * 94 + (getUnsigned(b[1]) - 161);
}
return result;
}
/**
* The first char computed by the Chinese Char ID
*
* @param id
* @return
*/
public static int CC_CHAR1(int id) {
return (id) / 94 + 176;
}
/**
* The second char computed by the Chinese Char ID
*
* @param id
* @return
*/
public static int CC_CHAR2(int id) {
return (id) % 94 + 161;
}
public static int strcat(byte[] dest, byte[] src, int len) {
if (dest != null && src != null && len > 0) {
for (int i = 0; i < dest.length; i++) {
if (dest[i] == 0) {
for (int j = 0; j < len; j++)
dest[i] = src[j];
return i;
}
}
}
return -1;
}
public static int strcpy(byte[] dest, byte[] src) {
return strcpy(dest, src, src.length);
}
public static int strcpy(byte[] dest, byte[] src, int len) {
if (dest != null && src != null && len > 0) {
int i = 0;
for (i = 0; i < len; i++) {
dest[i] = src[i];
}
return i;
}
return -1;
}
/**
* 根据ID号得到对应的GB汉字
*
* @param id
* 0--6767
* @return
*/
public static String getGB(int id) {
String result = null;
if (id >= 0 && id < 6768) {
byte[] b = new byte[2];
b[0] = (byte) CC_CHAR1(id);
b[1] = (byte) CC_CHAR2(id);
try {
result = new String(b, "GBK");
} catch (UnsupportedEncodingException e) {
e.printStackTrace();
}
}
return result;
}
public static boolean isSingle(String s) {
if (s != null && s.getBytes().length == 1)
return true;
else
return false;
}
public static int[] removeInvalid(int[] src) {
int[] result = null;
int count = 0;
if (src != null && src.length > 0) {
for (int i = 0; i < src.length; i++) {
if (i != 0 && src[i] == 0)
break;
else
count++;
}
result = new int[count];
for (int i = 0; i < count; i++)
result[i] = src[i];
}
return result;
}
/**
* 判断字符串是否是年份
*
* @param str
* @return
*/
public static boolean isYearTime(String snum) {
if (snum != null) {
int len = snum.length();
String first = snum.substring(0, 1);
// 1992年, 98年,06年
if (isAllSingleByte(snum)
&& (len == 4 || len == 2 && (GFString.cint(first) > 4 || GFString.cint(first) == 0)))
return true;
if (isAllNum(snum) && (len >= 6 || len == 4 && "056789".indexOf(first) != -1))
return true;
if (getCharCount("零○一二三四五六七八九壹贰叁肆伍陆柒捌玖", snum) == len && len >= 2)
return true;
if (len == 4 && getCharCount("千仟零○", snum) == 2)// 二仟零二年
return true;
if (len == 1 && getCharCount("千仟", snum) == 1)
return true;
if (len == 2 && getCharCount("甲乙丙丁戊己庚辛壬癸", snum) == 1
&& getCharCount("子丑寅卯辰巳午未申酉戌亥", snum.substring(1)) == 1)
return true;
}
return false;
}
/**
* 判断一个字符串的所有字符是否在另一个字符串集合中
*
* @param aggr
* 字符串集合
* @param str
* 需要判断的字符串
* @return
*/
public static boolean isInAggregate(String aggr, String str) {
if (aggr != null && str != null) {
str += "1";
for (int i = 0; i < str.length(); i++) {
String s = str.substring(i, i + 1);
if (aggr.indexOf(s) == -1)
return false;
}
return true;
}
return false;
}
/**
* 判断该字符串是否是半角字符
*
* @param str
* @return
*/
public static boolean isDBCCase(String str) {
if (str != null) {
str += " ";
for (int i = 0; i < str.length(); i++) {
String s = str.substring(i, i + 1);
if (s.getBytes().length != 1)
return false;
}
return true;
}
return false;
}
/**
* 判断该字符串是否是全角字符
*
* @param str
* @return
*/
public static boolean isSBCCase(String str) {
if (str != null) {
str += " ";
for (int i = 0; i < str.length(); i++) {
String s = str.substring(i, i + 1);
if (s.getBytes().length != 2)
return false;
}
return true;
}
return false;
}
/**
* 判断是否是一个连字符(分隔符)
*
* @param str
* @return
*/
public static boolean isDelimiter(String str) {
if (str != null && ("-".equals(str) || "-".equals(str)))
return true;
else
return false;
}
public static boolean isUnknownWord(String word) {
if (word != null && word.indexOf("未##") == 0)
return true;
else
return false;
}
public static PersonName chineseNameSplit(String word, PosTagger personTagger) {
PersonName result = null;
if (word != null && personTagger != null) {
Dictionary personDict = personTagger.getUnknownDict();
int len = word.length();
if (len < 2 || len > 4)
return null;
String[] atoms = GFString.atomSplit(word);
for (String s : atoms) {
if (Utility.charType(s) != Utility.CT_CHINESE && Utility.charType(s) != Utility.CT_OTHER)
return null;
}
String surName = null;
int surNameLen = 2;
if (len > 2)
surName = word.substring(0, surNameLen);
else if (len == 2)
surName = word;
if (!personDict.isExist(surName, 1)) {
surNameLen = 1;
if (len > 1)
surName = word.substring(0, surNameLen);
else if (len == 1)
surName = word;
if (!personDict.isExist(surName, 1)) {
surName = null;
surNameLen = 0;
}
}
String giveName = word.substring(surNameLen);
if (len > 3) {
String temp = word.substring(surNameLen, surNameLen + 1);
if (personDict.isExist(temp, 1)) {
giveName = word.substring(surNameLen + 1);
}
}
double freq = personDict.getFreq(surName, 1);
String temp = giveName.substring(0, 1);
double freq2 = personDict.getFreq(temp, 2);
if (surNameLen != 2
&& ((surNameLen == 0 && len > 2) || giveName.length() > 2 || getForeignCharCount(word) >= 3
&& freq < personDict.getFreq("张", 1) / 40 && freq2 < personDict.getFreq("华", 2) / 20 || (freq < 10 && getForeignCharCount(giveName) == (len - surNameLen) / 2)))
return null;
if (len == 2 && personTagger.isGivenName(word))
return null;
result = new PersonName();
result.setFirstName(surName);
result.setLastName(giveName);
}
return result;
}
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -