⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 suggest.cpp

📁 unix/linux下拼写检查程序源码
💻 CPP
📖 第 1 页 / 共 2 页
字号:
// Copyright 2000 by Kevin Atkinson under the terms of the LGPL// suggest.cc Suggestion code for Aspell// The magic behind my spell checker comes from merging Lawrence// Philips excellent metaphone algorithm and Ispell's near miss// strategy which is inserting a space or hyphen, interchanging two// adjacent letters, changing one letter, deleting a letter, or adding// a letter.// // The process goes something like this.// // 1.     Convert the misspelled word to its soundslike equivalent (its//        metaphone for English words).// // 2.     Find words that have the same soundslike pattern.//// 3.     Find words that have similar soundslike patterns. A similar//        soundlike pattern is a pattern that is obtained by//        interchanging two adjacent letters, changing one letter,//        deleting a letter, or adding a letter.//// 4.     Score the result list and return the words with the lowest//        score. The score is roughly the weighed average of the edit//        distance of the word to the misspelled word, the soundslike//        equivalent of the two words, and the phoneme of the two words.//        The edit distance is the weighed total of the number of//        deletions, insertions, exchanges, or adjacent swaps needed to//        make one string equivalent to the other.//// Please note that the soundlike equivalent is a rough approximation// of how the words sounds. It is not the phoneme of the word by any// means.  For more information on the metaphone algorithm please see// the file metaphone.cc which included a detailed description of it.// NOTES TO SELF// Consider jump starting the distance alogo. when to words have the same// prefix// Consider short circling the distance algo. when to words have the same// suffix#include "getdata.hpp"#include "fstream.hpp"#include "aspeller.hpp"#include "asuggest.hpp"#include "basic_list.hpp"#include "clone_ptr-t.hpp"#include "config.hpp"#include "data.hpp"#include "editdist.hpp"#include "editdist2.hpp"#include "file_data_util.hpp"#include "errors.hpp"#include "hash-t.hpp"#include "language.hpp"#include "leditdist.hpp"#include "speller_impl.hpp"#include "suggest.hpp"//#define DEBUG_SUGGESTusing namespace aspeller;using namespace acommon;using namespace std;namespace aspeller_default_suggest {  typedef vector<String> NearMissesFinal;  template <class Iterator>  inline Iterator preview_next (Iterator i) {    return ++i;  }    //  // OriginalWord stores infomation about the original misspelled word  //   for convince and speed.  //  struct OriginalWord {    String   word;    String   word_stripped;    String   soundslike;    String   phoneme;    CasePattern  case_pattern;    OriginalWord() {}    OriginalWord (const String &w, const String &sl, const String &p)      : word(w), soundslike(sl), phoneme(p) {}    OriginalWord (const String &w, const String &sl, const String &p,		 const String &l, CasePattern cp)      : word(w), word_stripped(l), soundslike(sl), phoneme(p), case_pattern(cp)    {}  };  //  // struct ScoreWordSound - used for storing the possible words while  //   they are being processed.  //  struct ScoreWordSound {    const char *  word;    const char *  word_stripped;    int           score;    int           soundslike_score;    bool          count;    ReplacementList::VirEmul * repl_list;    ScoreWordSound() {repl_list = 0;}    ~ScoreWordSound() {delete repl_list;}  };  inline int compare (const ScoreWordSound &lhs, 		      const ScoreWordSound &rhs)   {    int temp = lhs.score - rhs.score;    if (temp) return temp;    return strcmp(lhs.word,rhs.word);  }  inline bool operator < (const ScoreWordSound & lhs, 			  const ScoreWordSound & rhs) {    return compare(lhs, rhs) < 0;  }  inline bool operator <= (const ScoreWordSound & lhs, 			   const ScoreWordSound & rhs) {    return compare(lhs, rhs) <= 0;  }  inline bool operator == (const ScoreWordSound & lhs, 			   const ScoreWordSound & rhs) {    return compare(lhs, rhs) == 0;  }  typedef BasicList<ScoreWordSound> NearMisses;   class Score {  protected:    const Language * lang;    OriginalWord      original_word;    SuggestParms     parms;  public:    Score(const Language *l, const String &w, const SuggestParms & p)      : lang(l), original_word(w, l->to_soundslike(w.c_str()), 			      l->to_phoneme(w.c_str()),			      to_stripped(*l, w),			      case_pattern(*l, w)),      parms(p)    {}    String fix_case(const String & word) {      return aspeller::fix_case(*lang,original_word.case_pattern,word);    }  };  class Working : public Score {       int threshold;    unsigned int max_word_length;    SpellerImpl  *     speller;    NearMisses         scored_near_misses;    NearMisses         near_misses;    NearMissesFinal  * near_misses_final;    BasicList<String>      strings;    static const bool do_count = true;    static const bool dont_count = false;    static const bool do_need_alloc = true;    static const bool dont_need_alloc = false;    void try_sound(const char *, int ms);    void add_nearmiss(const char * word, int ms, bool count, 		      bool need_alloc, ReplacementList::VirEmul * rl = 0) {      near_misses.push_front(ScoreWordSound());      ScoreWordSound & d = near_misses.front();      if (need_alloc) {	strings.push_front(word);	d.word = strings.front().c_str();      } else {	d.word = word;      }      if (parms.use_typo_analysis) {	unsigned int l = strlen(word);	if (l > max_word_length) max_word_length = l;	      } else {	if (!is_stripped(*lang,word)) {	  strings.push_front(to_stripped(*lang,word));	  d.word_stripped = strings.front().c_str();	} else {	  d.word_stripped = d.word;	}      }      d.soundslike_score = ms;      d.count = count;      d.repl_list = rl;    }    int needed_level(int want, int soundslike_score) {      int n = (100*want - parms.soundslike_weight*soundslike_score)	/(parms.word_weight*parms.edit_distance_weights.min);      return n > 0 ? n : 0;    }    int weighted_average(int soundslike_score, int word_score) {      return (parms.word_weight*word_score 	      + parms.soundslike_weight*soundslike_score)/100;    }    int skip_first_couple(NearMisses::iterator & i) {      int k = 0;      while (preview_next(i) != scored_near_misses.end()) 	// skip over the first couple of items as they should	// not be counted in the threshold score.      {	if (!i->count) {	  ++i;	} else if (k == parms.skip) {	  break;	} else {	  ++k;	  ++i;	}      }      return k;    }    void try_others();    void score_list();    void transfer();  public:    Working(SpellerImpl * m, const Language *l,	    const String & w, const SuggestParms & p)      : Score(l,w,p), threshold(1), max_word_length(0), speller(m) {}    void get_suggestions(NearMissesFinal &sug);    void get_suggestions_ultra(NearMissesFinal &sug);  };  //  // try_sound - tries the soundslike string if there is a match add   //    the possable words to near_misses  //  void Working::try_sound (const char * m, int ms)    {    // sound is the object in the list which is a lot smaller than m    for (SpellerImpl::DataSetCollection::const_iterator i 	   = speller->data_set_collection().begin();	 i != speller->data_set_collection().end();	 ++i) {            if (!i->use_to_suggest) continue;      if (i->data_set->basic_type == DataSet::basic_word_set) {	BasicWordSet::Emul e = static_cast<const BasicWordSet *>	  (i->data_set)->words_w_soundslike(m);	BasicWordInfo w;	String word;	while ((w = e.next())) {	  w.get_word(word, i->local_info.convert);	  add_nearmiss(word.c_str(), ms, do_count, do_need_alloc);	}	      } else {	BasicReplacementSet::Emul e = static_cast<const BasicReplacementSet *>(i->data_set)->repls_w_soundslike(m);	ReplacementList repl;	while (! (repl = e.next()).empty() )	  add_nearmiss(repl.misspelled_word, ms, 		       dont_count, dont_need_alloc, repl.elements);	        }    }  }  //  // try_others - tries to come up with possible suggestions  //    void Working::try_others () {    const char *replace_list = lang->soundslike_chars();    const String & word       = original_word.word;    const String & soundslike = original_word.soundslike;        String::size_type i;        String new_soundslike;    new_soundslike.reserve(soundslike.size() + 1);    char a,b;    const char * c;    // Insert a space or hyphone    if (word.size() >= 4) {      char * new_word = new char[word.size() + 2];      strncpy(new_word, word.data(), word.size());      new_word[word.size() + 1] = '\0';      new_word[word.size() + 0] = new_word[word.size() - 1];      for (i = word.size() - 2; i >= 2; --i) {	new_word[i+1] = new_word[i];	new_word[i] = '\0';		if (speller->check(new_word) && speller->check(new_word + i + 1)) {	  new_word[i] = ' ';	  add_nearmiss(new_word, parms.edit_distance_weights.del2,		       dont_count, do_need_alloc);	  new_word[i] = '-';	  add_nearmiss(new_word, parms.edit_distance_weights.del2,		       dont_count, do_need_alloc);	}      }            delete[] new_word;    }    if (parms.soundslike_level == 1) {            try_sound(soundslike.c_str(), 0);      // Change one letter            new_soundslike = soundslike;      for (i = 0; i != soundslike.size(); ++i) {	for (c=replace_list; *c; ++c) {	  if (*c == soundslike[i]) continue;	  new_soundslike[i] = *c;	  try_sound(new_soundslike.c_str(),parms.edit_distance_weights.sub);	}	new_soundslike[i] = soundslike[i];      }      // Interchange two adjacent letters.      for (i = 0; i+1 != soundslike.size(); ++i) {	a = new_soundslike[i];	b = new_soundslike[i+1];	new_soundslike[i] = b;	new_soundslike[i+1] = a;	try_sound(new_soundslike.c_str(),parms.edit_distance_weights.swap);	new_soundslike[i] = a;	new_soundslike[i+1] = b;      }      // Add one letter      new_soundslike += ' ';      i = new_soundslike.size()-1;      while(true) {	for (c=replace_list; *c; ++c) {	  new_soundslike[i] = *c;	  try_sound(new_soundslike.c_str(),parms.edit_distance_weights.del1);	}	if (i == 0) break;	new_soundslike[i] = new_soundslike[i-1];	--i;      }          // Delete one letter      if (soundslike.size() > 1) {	new_soundslike = soundslike;	a = new_soundslike[new_soundslike.size() - 1];	new_soundslike.resize(new_soundslike.size() - 1);	i = new_soundslike.size();	while (true) {	  try_sound(new_soundslike.c_str(),parms.edit_distance_weights.del2);	  if (i == 0) break;	  b = a;	  a = new_soundslike[i-1];	  new_soundslike[i-1] = b;	  --i;	}      }    } else {      const char * original_soundslike = original_word.soundslike.c_str();      for (SpellerImpl::DataSetCollection::const_iterator i 	     = speller->data_set_collection().begin();	   i != speller->data_set_collection().end();	   ++i) {	if (!i->use_to_suggest) continue;      	if (i->data_set->basic_type == DataSet::basic_word_set) {	  const BasicWordSet * data_set 	    = static_cast<const BasicWordSet *>(i->data_set);	  BasicWordSet::SoundslikeEmul els = data_set->soundslike_elements();    	  SoundslikeWord sw;	  while ( (sw = els.next()) == true) {	    int score = limit2_edit_distance(original_soundslike, 					     sw.soundslike,					     parms.edit_distance_weights);	    if (score < LARGE_NUM) {	      BasicWordSet::Emul e = data_set->words_w_soundslike(sw);	      BasicWordInfo bw;	      String word;	      while ((bw = e.next())) {		bw.get_word(word, i->local_info.convert);		add_nearmiss(word.c_str(), score, do_count, do_need_alloc);	      }	    }	  }	} else {		  const BasicReplacementSet * repl_set	    = static_cast<const BasicReplacementSet *>(i->data_set);

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -