📄 matchstring.htm
字号:
<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
<html>
<head>
<link rel="stylesheet" href="css/stdlayout.css" type="text/css">
<link rel="stylesheet" href="css/print.css" type="text/css">
<meta content="text/html; charset=gb2312" http-equiv="content-type">
<title>字串核对</title>
</head>
<body>
<h3><a href="http://caterpillar.onlyfun.net/GossipCN/index.html">From
Gossip@caterpillar</a></h3>
<h1><a href="AlgorithmGossip.htm">Algorithm Gossip: 字串核对</a></h1>
<h2> 说明</h2>
今日的一些高阶程式语言对于字串的处理支援越来越强大(例如Java、Perl等),不过字串搜寻本身仍是个值得探讨的课题,在这边以Boyer- Moore法来说明如何进行字串说明,这个方法快且原理简洁易懂。<br>
<h2>解法</h2>
字串搜寻本身不难,使用暴力法也可以求解,但如何快速搜寻字串就不简单了,传统的字串搜寻是从关键字与字串的开头开始比对,例如 <a href="http://www.ics.uci.edu/%7Eeppstein/161/960227.html">Knuth-Morris-Pratt 演算法</a> 字串搜寻,这个方法也不错,不过要花时间在公式计算上;Boyer-Moore字串核对改由关键字的后面开始核对字串,并制作前进表,如果比对不符合则依前进表中的值前进至下一个核对处,假设是p好了,然后比对字串中p-n+1至p的值是否与关键字相同。 <br>
<div style="text-align: center;"><img style="width: 271px; height: 87px;" alt="字串核对" title="字串核对" src="images/matchString-1.jpg"><br>
<br>
<div style="text-align: left;">那么前进表该如何前进,举个实际的例子,如果要在字串中搜寻JUST这个字串,则可能遇到的几个情况如下所示:<br>
<div style="text-align: center;"><img style="width: 364px; height: 367px;" alt="字串核对" title="字串核对" src="images/matchString-2.jpg"><br>
<br>
<br>
</div>
依照这个例子,可以决定出我们的前进值表如下: <br>
<table style="text-align: left; height: 102px; width: 313px;" border="1" cellpadding="2" cellspacing="2">
<tbody>
<tr>
<td>其它</td>
<td>J</td>
<td>U</td>
<td>S</td>
<td>T</td>
</tr>
<tr>
<td>4</td>
<td>3</td>
<td>2</td>
<td>1</td>
<td>4(match?)</td>
</tr>
</tbody>
</table>
<br>
如果关键字中有重复出现的字元,则前进值就会有两个以上的值,此时则取前进值较小的值,如此就不会跳过可能的位置,例如texture这个关键字,t的前
进值应该取后面的3而不是取前面的7。<br>
<h2> 实作</h2>
<ul>
<li> C
</li>
</ul>
<pre>#include <stdio.h> <br>#include <stdlib.h> <br>#include <string.h> <br><br>void table(char*); // 建立前进表 <br>int search(int, char*, char*); // 搜寻关键字 <br>void substring(char*, char*, int, int); // 取出子字串 <br><br>int skip[256]; <br><br>int main(void) { <br> char str_input[80]; <br> char str_key[80]; <br> char tmp[80] = {'\0'}; <br> int m, n, p; <br><br> printf("请输入字串:"); <br> gets(str_input); <br> printf("请输入搜寻关键字:"); <br> gets(str_key); <br><br> m = strlen(str_input); // 计算字串长度 <br> n = strlen(str_key); <br> table(str_key); <br> p = search(n-1, str_input, str_key); <br><br> while(p != -1) { <br> substring(str_input, tmp, p, m); <br> printf("%s\n", tmp); <br> p = search(p+n+1, str_input, str_key); <br> } <br><br> printf("\n"); <br><br> return 0; <br>} <br><br>void table(char *key) { <br> int k, n; <br><br> n = strlen(key); <br><br> for(k = 0; k <= 255; k++) <br> skip[k] = n; <br> for(k = 0; k < n - 1; k++) <br> skip[key[k]] = n - k - 1; <br>} <br><br>int search(int p, char* input, char* key) { <br> int i, m, n; <br> char tmp[80] = {'\0'}; <br><br> m = strlen(input); <br> n = strlen(key); <br><br> while(p < m) { <br> substring(input, tmp, p-n+1, p); <br> <br> if(!strcmp(tmp, key)) // 比较两字串是否相同 <br> return p-n+1; <br> p += skip[input[p]]; <br> } <br><br> return -1; <br>} <br><br>void substring(char *text, char* tmp, int s, int e) { <br> int i, j; <br><br> for(i = s, j = 0; i <= e; i++, j++) <br> tmp[j] = text[i]; <br><br> tmp[j] = '\0'; <br>} <br></pre>
<br>
<ul>
<li> Java
</li>
</ul>
<pre>import java.io.BufferedReader;<br>import java.io.IOException;<br>import java.io.InputStreamReader;<br><br>public class StringMatch {<br> private int[] skip;<br> private int p;<br> private String str;<br> private String key;<br> <br> public StringMatch(String key) {<br> skip = new int[256];<br> this.key = key;<br> <br> for(int k = 0; k <= 255; k++) <br> skip[k] = key.length(); <br> for(int k = 0; k < key.length() - 1; k++) <br> skip[key.charAt(k)] = key.length() - k - 1; <br> }<br> <br> public void search(String str) {<br> this.str = str;<br> p = search(key.length()-1, str, key);<br> }<br> <br> private int search(int p, String input, String key) { <br> while(p < input.length()) { <br> String tmp = input.substring(<br> p-key.length()+1, p+1); <br><br> if(tmp.equals(key)) // 比较两字串是否相同 <br> return p-key.length()+1; <br> p += skip[input.charAt(p)]; <br> } <br><br> return -1; <br> }<br> <br> public boolean hasNext() {<br> return (p != -1);<br> }<br> <br> public String next() {<br> String tmp = str.substring(p);<br> p = search(p+key.length()+1, str, key);<br> return tmp;<br> }<br> <br> public static void main(String[] args) <br> throws IOException {<br> BufferedReader bufReader = <br> new BufferedReader(<br> new InputStreamReader(System.in));<br> <br> System.out.print("请输入字串:");<br> String str = bufReader.readLine();<br> System.out.print("请输入搜寻关键字:"); <br> String key = bufReader.readLine();<br> <br> StringMatch strMatch = new StringMatch(key);<br> strMatch.search(str);<br><br> while(strMatch.hasNext()) { <br> System.out.println(strMatch.next()); <br> } <br> }<br>} </pre>
<br>
<br>
</div>
</div>
</body>
</html>
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -