⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 kmp.cc

📁 早期freebsd实现
💻 CC
字号:
//From: "Douglas C. Schmidt" <schmidt@zola.ICS.UCI.EDU>//Date: Fri, 28 Jul 89 11:47:11 -0700/* Nifty little program that illustrates an implementation of the Knuth,    Morris, Pratt string matching algorithm.   This program has a user interface similar to fgrep, i.e., when   you provide it with a fixed pattern it prints out all the lines   from the standard input (or one user-specified input file) that    contain at least one substring that matches the provided pattern.       Relevant options are:      -i: Ignore case when performing comparisons.    -n: Print out lines numbers along with the matching lines.   -v: Print out lines that *don't* match the pattern.   The code below is extensively commented.  If these comments   distract you, run the code through g++ -E first ;-). */   #include <stdio.h>#include <std.h>#include <GetOpt.h>/* This array is designed for mapping upper and lower case letter   together for a case independent comparison.  The mappings are   based upon the ASCII character sequences. */char charmap[] = {	'\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007',	'\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017',	'\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027',	'\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037',	'\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047',	'\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057',	'\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067',	'\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077',	'\100', '\141', '\142', '\143', '\144', '\145', '\146', '\147',	'\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',	'\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',	'\170', '\171', '\172', '\133', '\134', '\135', '\136', '\137',	'\140', '\141', '\142', '\143', '\144', '\145', '\146', '\147',	'\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',	'\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',	'\170', '\171', '\172', '\173', '\174', '\175', '\176', '\177',	'\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207',	'\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217',	'\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227',	'\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237',	'\240', '\241', '\242', '\243', '\244', '\245', '\246', '\247',	'\250', '\251', '\252', '\253', '\254', '\255', '\256', '\257',	'\260', '\261', '\262', '\263', '\264', '\265', '\266', '\267',	'\270', '\271', '\272', '\273', '\274', '\275', '\276', '\277',	'\300', '\341', '\342', '\343', '\344', '\345', '\346', '\347',	'\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357',	'\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367',	'\370', '\371', '\372', '\333', '\334', '\335', '\336', '\337',	'\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347',	'\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357',	'\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367',	'\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377',};/* The list of available program options. */enum options{  DEBUG, FOLD_CASE, LINE_NUMBERS, INVERSE, OPTION_SIZE};/* Vector storing the enabled options (all initially disabled). */char option[OPTION_SIZE];/* Data and function members necessary to implement the KMP algorithm. */class kmp{private: #ifdef GNUG  const int RESET_PATTERN_FLAG = -1;#else#define RESET_PATTERN_FLAG -1#endif    int  *fail_trans_table; /* Stores the generated nfa used when matching. */  char *pattern;          /* Stores the user-supplied pattern. */  int   pattern_len;      /* Pattern length. */  void print_fail_trans_table (void);public:  kmp (char *pattern, int pattern_len, int debug = 0); ~kmp () { delete fail_trans_table; }  int operator ()(char *text, int text_len);};/* Provide a debugging dump of the failure function.  Nothing fancy... */voidkmp::print_fail_trans_table (void){  int i;    for (i = 0; i < pattern_len; i++)    printf ("%3d,", i);  putchar ('\n');  for (i = 0; i < pattern_len; i++)    printf ("%3d,", fail_trans_table [i]);  putchar ('\n');}/* This constructor builds the transition table that encodes the failure function    automaton.  The table stores the PATTERN index we go to in the event of a    mismatch with the input text string.  This generation process runs in time    linear to the pattern size.       The terms SUFFIX and PREFIX used below are defined in terms of each other.     That is, at each state of the failure function we seek to determine the largest    pattern prefix that matches with the largest suffix in the actual input text.     This information informs us how far we can legitimately shift the pattern to the    right when a mismatch occurs during the string matching process.       Stated more formally, this means that for all i (0 <= i <= (PATTERN_LEN - 1))   FAIL_TRANS_TABLE (i) is the largest j < i, such that PATTERN[1..j - 1]    is a suffix of PATTERN[1..i - 1] and PATTERN[i] != PATTERN[j]. */kmp::kmp (char *pat, int len, int debug):  pattern (pat), fail_trans_table (new int[len]), pattern_len (len){  /* Points 1 beyond the rightmost potential *suffix* in the pattern (which is      actually simulating the behavior of an actual input text string. */  int suffix_end = 0;  /* Points 1 beyond the rightmost potential *prefix* in the pattern. */  int prefix_end = fail_trans_table[suffix_end] = RESET_PATTERN_FLAG;    do    {      /* This WHILE loop uses the precomputed failure function to look further          left into the pattern trying to find an index location where the pattern          prefix matches the pattern suffix.  However, if/when we locate the         RESET_PATTERN_FLAG this means that we can just skip ahead to the next          character in the input text. */               while (prefix_end != RESET_PATTERN_FLAG              && pattern[suffix_end] != pattern[prefix_end])        prefix_end = fail_trans_table[prefix_end];      /* Once SUFFIX_END and PREFIX_END are pre-incremented below we know that          the first PREFIX_END characters of PATTERN match the characters in          positions PATTERN[SUFFIX_END - PREFIX_END .. SUFFIX_END - 1], i.e.,         the last PREFIX_END characters in the rightmost part of the first          SUFFIX_END - 1 characters in PATTERN.                 If the character at location PATTERN[SUFFIX_END] matches that at          PATTERN[PREFIX_END] it is silly to have the failure transition         jump to that pattern location (since it would immediately fail to         match, of course!). Instead, in that case we just ``borrow'' the         previously computed transition stored at FAIL_TRANS_TABLE[PREFIX_END]         and use it. */      if (pattern[++suffix_end] == pattern[++prefix_end])        fail_trans_table[suffix_end] = fail_trans_table[prefix_end];      else        fail_trans_table[suffix_end] = prefix_end;    }  /* Adding the extra 1 here is necessary since C strings are     indexed from 0 to pattern_len - 1... */  while (suffix_end + 1 < pattern_len);  if (debug)    print_fail_trans_table ();}/* Actually perform the KMP matching algorithm using the generated determinisitic    pattern matching match encoded in the failure transition table.  This version    is optimized on the assumption that there will be more characters in the text    input that *don't* match the pattern than ones that do match it. Therefore, we    make a special effort to keep looping through the failure function as long as    the text and pattern don't match. */intkmp::operator ()(char *text, int text_len){  int suffix_end = RESET_PATTERN_FLAG;  int prefix_end = RESET_PATTERN_FLAG;  /* If TEXT length is shorted than PATTERN we'll bail out now... */  if (text_len < pattern_len)    return 0;      /* Split the following two cases so that we don't pay any     unnecessary overhead when case folding is not required. */  if (option[FOLD_CASE])    /* Note how this main loop is almost identical with the      `failure transition table building' algorithm used above. */    do      {        /* Ignore case when matching and keep applying the failure function           until we get a match or are forced to restart the pattern (starting           with the *following* input text character, that is). */        while (prefix_end != RESET_PATTERN_FLAG                && charmap[text[suffix_end]] != charmap[pattern[prefix_end]])          prefix_end = fail_trans_table[prefix_end];        if (prefix_end + 1 >= pattern_len)          return 1;        else          ++suffix_end, ++prefix_end;      }    /* This conditional expression is used to terminate the search when it       becomes clear that we can't match the PATTERN since the TEXT has       fewer unexamined characters than the PATTERN length. */    while (text_len - suffix_end >= pattern_len - prefix_end);  else    /* This loop is identical with the preceding one, except that we don't       bother to fold case... */    do      {        while (prefix_end != RESET_PATTERN_FLAG                && text[suffix_end] != pattern[prefix_end])          prefix_end = fail_trans_table[prefix_end];        if (prefix_end + 1 >= pattern_len)          return 1;        else          ++suffix_end, ++prefix_end;      }    while (text_len - suffix_end >= pattern_len - prefix_end);  return 0;}/* The name sez it all! */voidprint_usage_and_die (char *prog_name){  fprintf (stderr, "usage: %s [-inv] pattern [file]\n", prog_name);  exit (1);}/* Main driver program.  Emulates certain useful features of fgrep. */   intmain (int argc, char **argv){  GetOpt getopt(argc, argv, "dinv");  if (argc == 1)    print_usage_and_die (argv[0]);  /* A rather arbitrary limit... */  const int MAX_LINE = 200;  char  text[MAX_LINE];  int   c;        /* Initialize any user-specified options. */  while ((c = getopt ()) != EOF)    switch (c)      {      case 'd': option[DEBUG] = 1; break;      case 'i': option[FOLD_CASE] = 1; break;      case 'n': option[LINE_NUMBERS] = 1; break;      case 'v': option[INVERSE] = 1; break;      default:  print_usage_and_die (argv[0]);      }        /* Call the constructor to build the failure function table. */  kmp match (argv[getopt.optind], strlen (argv[getopt.optind]), option[DEBUG]);  /* Handle a user-specified input file. */  if (argv[++getopt.optind] && !freopen (argv[getopt.optind], "r", stdin))    {      perror (argv[0]); return 1;    }    /* Split the following into two separate cases to avoid overhead      when line numbers are not required. */  if (option[LINE_NUMBERS])    {      int line;            for (line = 1; gets (text); line++)        if (match (text, strlen (text)) != option[INVERSE])          printf ("%d:%s\n", line, text);    }  else    {      while (gets (text))        if (match (text, strlen (text)) != option[INVERSE])          puts (text);    }  return 0;}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -