⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 gfstring.java

📁 基于java语言的分词系统
💻 JAVA
📖 第 1 页 / 共 4 页
字号:
			temp = src + "0";
			for (int i = 0, j = 0, k = 0; i < temp.length() - 2; i += 2, j++) {
				b[j] = Integer.parseInt(temp.substring(i, i + 2), 16);

				k = j % 7;
				srcAscii = (byte) (((b[j] << k) & 0x7F) | left);
				result += (char) srcAscii;
				left = (byte) (b[j] >>> (7 - k));
				if (k == 6) {
					result += (char) left;
					left = 0;
				}
				if (j == src.length() / 2)
					result += (char) left;
			}
		}
		return result;
	}

	/**
	 * <pre>
	 *                                     是否是手机号码
	 *                                     1.11位
	 *                                     2.是数字
	 *                                     3.以&quot;13&quot;开头
	 * </pre>
	 * 
	 * @param msg
	 * @return
	 */
	public static boolean isMobileNo(String msg) {
		// msg = quan2ban(msg);
		// msg = removeSpace(msg);
		if (msg != null && msg.length() == 11) {
			if (isNumeric(msg) && (msg.substring(0, 2).equals("13") || msg.substring(0, 2).equals("15"))) {
				return true;
			}
		}

		return false;
	}

	/**
	 * <pre>
	 *                                    是否是一个电话号码.
	 *                                    首先做预处理,把转全角字符转成半角,并把非数字字符去掉,用空格替代
	 *                                    
	 *                                    1.长度要至等于7,但不能超过12
	 *                                    2.手机号是一个电话号码
	 *                                    3.按空格分隔,长度大于等于3且小于等于12的数字字段至少有一个,且最大不超过2个
	 * </pre>
	 * 
	 * @param msg
	 * @return
	 */
	public static boolean isTelNo(String msg) {
		// msg = quan2banGBK(msg);
		// msg = removeSpace(msg);
		if (msg != null && msg.length() >= 7) {
			String temp = msg + " ";
			String t = null;
			for (int i = 0; i < temp.length() - 1; i++) {
				t = temp.substring(i, i + 1);
				if (!isNumeric(t)) {
					temp = temp.substring(0, i) + " " + temp.substring(i + 1);
				}
			}

			msg = removeSpace(temp);
			if (isNumeric(msg) && msg.length() >= 7 && msg.length() <= 12)
				if (msg.substring(0, 1).equals("0")) {
					if (msg.length() >= 10)
						return true;

				} else {
					if (isMobileNo(msg))
						return true;
					else if (msg.length() <= 8)
						return true;
				}

		}

		return false;
	}

	 
	 
	 
	/**
	 * <pre>
	 *                               得到指定位置前的非空格字符
	 *                               比如:源字符串为:2室一厅,“室”前一个有效字符为2
	 *                               源字符串为:2 室 一厅,“室”前一个有效字符为2
	 * </pre>
	 * 
	 * @param msg
	 * @param index
	 * @return
	 */
	public static String getAnteriorNotSpaceChar(String msg, int index) {
		String ch = null;

		if (msg != null && index > 0) {
			for (int i = index - 1; i >= 0; i--) {
				String s = msg.substring(i, i + 1);
				if (!s.equals(" "))
					return s;
			}
		}

		return ch;
	}

	/**
	 * 按字符串长度的长短进行排序
	 * <p>
	 * 选用快速排序算法
	 * 
	 * @param list
	 * @param long2short
	 *            True:从长到短.False:从短到长
	 * @return
	 */
	public static ArrayList<String> sortByLen(ArrayList<String> list, boolean long2short) {
		ArrayList<String> rs = null;

		if (list != null) {
			rs = new ArrayList<String>(list.size());
			for (String name : list) {
				name = GFString.removeSpace(name);
				if (name != null && name.length() > 1) {
					if (rs.size() > 0) {
						for (int i = 0; i < rs.size(); i++) {
							if (name.length() >= rs.get(i).length()) {
								rs.add(i, name);
								break;
							} else if (i == rs.size() - 1) {
								rs.add(name);
								break;
							} else
								continue;
						}
					} else
						rs.add(name);
				}
			}

			if (!long2short) {
				ArrayList<String> rs2 = new ArrayList<String>();
				for (String s : rs)
					rs2.add(0, s);
				rs = rs2;
			}
		}

		return rs;

	}

	/**
	 * 把指定位置指定长度的字符用新字符串替换掉
	 * 
	 * @param 源字符串
	 * @param index
	 *            替换字符串的开始下标
	 * @param len
	 *            替换的长度
	 * @param newstr
	 *            新字符串
	 * @return
	 */
	public static String replace(String src, int index, int len, String newstr) {
		String result = src;
		if (src != null && index >= 0 && index < src.length()) {
			if (newstr == null)
				newstr = "";

			String p1 = src.substring(0, index);

			if (index + len >= src.length())
				result = p1 + newstr;
			else {
				String p2 = src.substring(index + len);
				result = p1 + newstr + p2;
			}
		}
		return result;
	}

	public static boolean hasZero(String msg) {
		if (msg != null) {
			byte[] bb = msg.getBytes();
			for (byte b : bb)
				if (b == 0)
					return true;
		}

		return false;
	}

	/**
	 * 判断字符串是否是字母数字的
	 * 
	 * @param str
	 * @return
	 */
	public static boolean isAlphanumeric(String str) {
		if (str != null) {
			byte[] bs = str.getBytes();
			for (byte b : bs) {
				if (b < 48 || b > 57 && b < 65 || b > 90 && b < 97 || b > 122)
					return false;
			}
			return true;
		}
		return false;
	}

	/**
	 * 去掉地名(市/区/县/乡/村)的后缀"市/区/县/乡/镇/村"
	 * 
	 * @param placename
	 * @return
	 */
	public static String removePlacenameSuffix(String placename) {
		int index = -1;
		String[] suffix = { "省", "市", "区", "县", "乡", "镇", "村" };
		if (placename != null && placename.length() > 1) {
			for (String s : suffix) {
				index = placename.indexOf(s);
				if (placename.length() > 2 && index == placename.length() - 1) {
					placename = placename.substring(0, index);
					break;
				}
			}
		}

		return placename;
	}

	/**
	 * 添加地名后缀(市/区/县/乡/村)的后缀"市/区/县/乡/镇/村"
	 * 
	 * @param placename
	 * @param type
	 *            地名类型 0:省 1:市 2:区 3:县
	 * 
	 * @return
	 */
	public static String addPlacenameSuffix(String placename, String suffix) {
		int index = -1;
		if (placename != null && placename.length() > 1) {
			if (suffix != null && suffix.length() == 1) {
				index = placename.indexOf(suffix);
				if (index != placename.length() - 1) {
					placename += suffix;
				}
			}

		}

		return placename;
	}

	/**
	 * 比较两个字符串,看str1是否在str2前,按字母排序. 比如:abc是在adc之前
	 * 
	 * @param str1
	 * @param str2
	 * @return
	 */
	public static boolean isBefore(String str1, String str2) {
		boolean rs = false;
		if (str1 != null && str2 != null) {
			int len = str1.length() < str2.length() ? str1.length() : str2.length();
			byte[] b1 = str1.getBytes();
			byte[] b2 = str2.getBytes();

			for (int i = 0; i < len; i++) {
				if (b2[i] > b1[i])
					return true;
				else if (b2[i] < b1[i])
					return false;

			}
		}
		return rs;
	}

 

	/**
	 * 是否是联通手机号码
	 * 
	 * @param sim
	 * @return
	 */
	public static boolean isUnicommMobile(String sim) {
		boolean result = false;
		if (sim != null && sim.length() == 11) {
			String part = sim.substring(0, 3);
			if (part.equals("130") || part.equals("131") || part.equals("132") || part.equals("133") || part.equals("153") || part.equals("156"))
				result = true;
		}
		return result;
	}

	/**
	 * 是否是联通手机号码
	 * 
	 * @param sim
	 * @return
	 */
	public static boolean isChinaMobile(String sim) {
		boolean result = false;
		if (sim != null && sim.length() == 11) {
			String part = sim.substring(0, 3);
			if (part.equals("134") || part.equals("135") || part.equals("136") || part.equals("137") || part.equals("138") || part.equals("139")
					|| part.equals("159") || part.equals("158"))
				result = true;
		}
		return result;

	}

	/**
	 * 取得指定位置后面的紧邻的字符
	 * 
	 * @param str
	 * @param index
	 * @return
	 */
	public static String getNextString(String str, int index) {
		String rs = null;

		if (str != null && str.length() > 0) {
			if (index < 0)
				rs = str.length() > 1 ? str.substring(0, 1) : str;
			else if (index == str.length() - 1)
				rs = null;
			else if (index == str.length() - 2)
				rs = str.substring(index + 1);
			else
				rs = str.substring(index + 1, index + 2);
		}

		return rs;
	}

	/**
	 * 对字符串进行原子分隔,比如:解放军第101医院----解 放 军 第 1 0 1 医 院
	 * 
	 * @param str
	 * @return
	 */
	public static String[] atomSplit(String str) {
		String[] result = null;
		if (str != null) {
			result = new String[str.length()];
			String temp = str + " ";
			for (int i = 0; i < temp.length() - 1; i++) {
				result[i] = temp.substring(i, i + 1);
			}
		}

		return result;
	}

	public static boolean hasTelNo(String str) {
		if (str != null && str.length() >= 7) {
			String[] ss = atomSplit(quan2banGBK(str));
			String rs = "";
			for (String s : ss) {
				if ("-".equals(s) || "/".equals(s) || "(".equals(s) || ")".equals(s) || isNumeric(s)) {
					rs += s;
				} else if (rs.length() > 0)
					break;

			}

			if (rs.length() >= 7) {
				if (isMobileNo(rs))
					return true;
				else if (isTelNo(rs))
					return true;
			}
		}

		return false;
	}

	/**
	 * 找到POS词性标记的位置
	 * 
	 * @param str
	 *            分词的字符串
	 * @param pos
	 *            字词标记
	 * @return
	 */
	public static int findPos(String str, String pos) {
		int result = -1;

		if (str != null && pos != null) {
			for (int i = 0; i < str.length(); i++) {
				int index = str.indexOf(pos, i);
				if (index + pos.length() == str.length() || (index != -1 && str.substring(index + pos.length()).indexOf(" ") == 0)) {
					result = index;
					break;
				}

			}
		}
		return result;
	}

	/**
	 * 去掉词性标注,获取关键词
	 * 
	 * @param str
	 *            带词性标注的关键词,比如:团校/bs /sh
	 * @return
	 */
	public static String getPOSKey(String str) {
		if (str != null) {
			int index = str.indexOf("/");
			if (index > 0) {
				return str.substring(0, index);
			}
		}

		return null;
	}

	/**
	 * <pre>
	 *  根据词性标注进行分隔,一个关键词可能有多个词性标注,在分隔是视为一个整体。
	 *  比如:团校/bs /sh 到 雅仕苑/bs /cm
	 *  分隔后:
	 *  团校/bs /sh 
	 *  到 
	 *  雅仕苑/bs /cm
	 * </pre>
	 * 
	 * @param str
	 * @return
	 */
	public static String[] splitByPOS(String str) {
		String[] result = null;
		ArrayList<String> list = new ArrayList<String>();
		if (str != null) {
			String[] ss = str.split(" ");
			int i = 0;
			for (String s : ss) {
				if (s.indexOf("/") == 0 && i - 1 >= 0 && i - 1 < list.size()) {
					String key = list.get(i - 1);
					list.set(i - 1, key + " " + s);
				} else {
					list.add(s);
					i++;
				}
			}

			result = new String[list.size()];
			list.toArray(result);
		}
		return result;
	}

	/**
	 * 得到一个汉字串对应的拼音.只把串的汉字进行转换,其它字符保持不变
	 * 
	 * @param cstr
	 * @return
	 */
	public static String getBopomofo(String cstr) {
		String bopomofo = null;

		if (cstr != null) {
			LinkedHashMap<String, Integer> bopoMap = new LinkedHashMap<String, Integer>();
			bopoMap.put("a", 1);
			bopoMap.put("a", -20319);
			bopoMap.put("ai", -20317);
			bopoMap.put("an", -20304);
			bopoMap.put("ang", -20295);
			bopoMap.put("ao", -20292);
			bopoMap.put("ba", -20283);
			bopoMap.put("bai", -20265);
			bopoMap.put("ban", -20257);
			bopoMap.put("bang", -20242);
			bopoMap.put("bao", -20230);
			bopoMap.put("bei", -20051);
			bopoMap.put("ben", -20036);
			bopoMap.put("beng", -20032);
			bopoMap.put("bi", -20026);
			bopoMap.put("bian", -20002);
			bopoMap.put("biao", -19990);
			bopoMap.put("bie", -19986);
			bopoMap.put("bin", -19982);
			bopoMap.put("bing", -19976);
			bopoMap.put("bo", -19805);
			bopoMap.put("bu", -19784);
			bopoMap.put("ca", -19775);
			bopoMap.put("cai", -19774);
			bopoMap.put("can", -19763);
			bopoMap.put("cang", -19756);
			bopoMap.put("cao", -19751);
			bopoMap.put("ce", -19746);
			bopoMap.put("ceng", -19741);
			bopoMap.put("cha", -19739);
			bopoMap.put("chai", -19728);
			bopoMap.put("chan", -19725);
			bopoMap.put("chang", -19715);
			bopoMap.put("chao", -19540);
			bopoMap.put("che", -19531);
			bopoMap.put("chen", -19525);
			bopoMap.put("cheng", -19515);
			bopoMap.put("chi", -19500);
			bopoMap.put("chong", -19484);
			bopoMap.put("chou", -19479);
			bopoMap.put("chu", -19467);
			bopoMap.put("chuai", -19289);
			bopoMap.put("chuan", -19288);
			bopoMap.put("chuang", -19281);
			bopoMap.put("chui", -19275);
			bopoMap.put("chun", -19270);
			bopoMap.put("chuo", -19263);
			bopoMap.put("ci", -19261);
			bopoMap.put("cong", -19249);
			bopoMap.put("cou", -19243);
			bopoMap.put("cu", -19242);
			bopoMap.put("cuan", -19238);
			bopoMap.put("cui", -19235);
			bopoMap.put("cun", -19227);
			bopoMap.put("cuo", -19224);
			bopoMap.put("da", -19218);
			bopoMap.put("dai", -19212);
			bopoMap.put("dan", -19038);
			bopoMap.put("dang", -19023);
			bopoMap.put("dao", -19018);
			bopoMap.put("de", -19006);
			bopoMap.put("deng", -19003);
			bopoMap.put("di", -18996);
			bopoMap.put("dian", -18977);
			bopoMap.put("diao", -18961);
			bopoMap.put("die", -18952);
			bopoMap.put("ding", -18783);
			bopoMap.put("diu", -18774);
			bopoMap.put("dong", -18773);
			bopoMap.put("dou", -18763);
			bopoMap.put("du", -18756);
			bopoMap.put("duan", -18741);
			bopoMap.put("dui", -18735);
			bopoMap.put("dun", -18731);
			bopoMap.put("duo", -18722);
			bopoMap.put("e", -18710);
			bopoMap.put("en", -18697);
			bopoMap.put("er", -18696);
			bopoMap.put("fa", -18526);
			bopoMap.put("fan", -18518);
			bopoMap.put("fang", -18501);
			bopoMap.put("fei", -18490);
			bopoMap.put("fen", -18478);
			bopoMap.put("feng", -18463);
			bopoMap.put("fo", -18448);
			bopoMap.put("fou", -18447);
			bopoMap.put("fu", -18446);
			bopoMap.put("ga", -18239);
			bopoMap.put("gai", -18237);
			bopoMap.put("gan", -18231);
			bopoMap.put("gang", -18220);
			bopoMap.put("gao", -18211);
			bopoMap.put("ge", -18201);
			bopoMap.put("gei", -18184);
			bopoMap.put("gen", -18183);
			bopoMap.put("geng", -18181);
			bopoMap.put("gong", -18012);
			bopoMap.put("gou", -17997);
			bopoMap.put("gu", -17988);
			bopoMap.put("gua", -17970);
			bopoMap.put("guai", -17964);

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -