⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 suggest.cpp

📁 unix/linux下拼写检查程序源码
💻 CPP
📖 第 1 页 / 共 2 页
字号:
	  BasicWordSet::SoundslikeEmul els = repl_set->soundslike_elements();		  SoundslikeWord w;	  while ( (w = els.next()) == true) {	    int score = limit2_edit_distance(original_soundslike, 					     w.soundslike,					     parms.edit_distance_weights);	  	    if (score < LARGE_NUM) {	      BasicReplacementSet::Emul e = repl_set->repls_w_soundslike(w);	      ReplacementList repl;	      while (! (repl = e.next()).empty() )		add_nearmiss(repl.misspelled_word, score, 			     dont_count, dont_need_alloc, repl.elements);	    }	  	  }		}      }    }  }  void Working::score_list() {    if (near_misses.empty()) return;    bool no_soundslike = strcmp(speller->lang().soundslike_name(), "none") == 0;    if (parms.use_typo_analysis) {            parms.set_original_word_size(original_word.word.size());            NearMisses::iterator i;      int word_score;            unsigned int j;      vector<unsigned char> original(original_word.word.size() + 1);      for (j = 0; j != original_word.word.size(); ++j)	original[j] = lang->to_normalized(original_word.word[j]);      original[j] = 0;      vector<unsigned char> word(max_word_length + 1);            for (i = near_misses.begin(); i != near_misses.end(); ++i) {	for (j = 0; (i->word)[j] != 0; ++j)	  word[j] = lang->to_normalized((i->word)[j]);	word[j] = 0;	word_score = typo_edit_distance(&*word.begin(), &*original.begin(),					parms.typo_edit_distance_weights);	i->score = weighted_average(i->soundslike_score, word_score);      }      near_misses.swap(scored_near_misses);      scored_near_misses.sort();            i = scored_near_misses.begin();            if (i == scored_near_misses.end()) return;            skip_first_couple(i);            threshold = i->score + parms.span;      if (threshold < parms.edit_distance_weights.max)	threshold = parms.edit_distance_weights.max;          } else {	      parms.set_original_word_size(original_word.word.size());            NearMisses::iterator i;      NearMisses::iterator prev;      int word_score;            near_misses.push_front(ScoreWordSound());      // the first item will NEVER be looked at.      scored_near_misses.push_front(ScoreWordSound());      scored_near_misses.front().score = -1;      // this item will only be looked at when sorting so       // make it a small value to keep it at the front.      int try_for = (parms.word_weight*parms.edit_distance_weights.max)/100;      while (true) {	try_for += (parms.word_weight*parms.edit_distance_weights.max)/100;		// put all pairs whose score <= initial_limit*max_weight	// into the scored list	prev = near_misses.begin();	i = prev;	++i;	while (i != near_misses.end()) {	  int level = needed_level(try_for, i->soundslike_score);		  if (no_soundslike)	    word_score = i->soundslike_score;	  else if (level >= int(i->soundslike_score/parms.edit_distance_weights.min))	    word_score = edit_distance(original_word.word_stripped.c_str(),				       i->word_stripped,				       level, level,				       parms.edit_distance_weights);	  else	    word_score = LARGE_NUM;	  	  if (word_score < LARGE_NUM) {	    i->score = weighted_average(i->soundslike_score, word_score);	    	    scored_near_misses.splice_into(near_misses,prev,i);	    	    i = prev; // Yes this is right due to the slice	    ++i;	    	  } else {	    	    prev = i;	    ++i;	    	  }	}		scored_near_misses.sort();		i = scored_near_misses.begin();	++i;		if (i == scored_near_misses.end()) continue;		int k = skip_first_couple(i);		if ((k == parms.skip && i->score <= try_for) 	    || prev == near_misses.begin() ) // or no more left in near_misses	  break;      }            threshold = i->score + parms.span;      if (threshold < parms.edit_distance_weights.max)	threshold = parms.edit_distance_weights.max;#  ifdef DEBUG_SUGGEST      cout << "Threshold is: " << threshold << endl;      cout << "try_for: " << try_for << endl;      cout << "Size of scored: " << scored_near_misses.size() << endl;      cout << "Size of ! scored: " << near_misses.size() << endl;#  endif            //if (threshold - try_for <=  parms.edit_distance_weights.max/2) return;            prev = near_misses.begin();      i = prev;      ++i;      while (i != near_misses.end()) {		int initial_level = needed_level(try_for, i->soundslike_score);	int max_level = needed_level(threshold, i->soundslike_score);		if (no_soundslike)	  word_score = i->soundslike_score;	else if (initial_level < max_level)	  word_score = edit_distance(original_word.word_stripped.c_str(),				     i->word_stripped,				     initial_level+1,max_level,				     parms.edit_distance_weights);	else	  word_score = LARGE_NUM;		if (word_score < LARGE_NUM) {	  i->score = weighted_average(i->soundslike_score, word_score);	  	  scored_near_misses.splice_into(near_misses,prev,i);	  	  i = prev; // Yes this is right due to the slice	  ++i;	  	} else {	  	  prev = i;	  ++i;	}      }            scored_near_misses.sort();      scored_near_misses.pop_front();    }  }  void Working::transfer() {#  ifdef DEBUG_SUGGEST    cout << endl << endl 	 << original_word.word << '\t' 	 << original_word.soundslike << '\t'	 << endl;#  endif    int c = 1;    hash_set<String,HashString<String> > duplicates_check;    String final_word;    pair<hash_set<String,HashString<String> >::iterator, bool> dup_pair;    for (NearMisses::const_iterator i = scored_near_misses.begin();	 i != scored_near_misses.end() && c <= parms.limit	   && ( i->score <= threshold || c <= 3 );	 ++i, ++c) {#    ifdef DEBUG_SUGGEST      cout << i->word << '\t' << i->score            << '\t' << lang->to_soundslike(i->word) << endl;#    endif      if (i->repl_list != 0) {	const char * word;	string::size_type pos;	while((word = i->repl_list->next()) != 0) {	  dup_pair = duplicates_check.insert(fix_case(word));	  if (dup_pair.second && 	      ((pos = dup_pair.first->find(' '), pos == String::npos)	       ? (bool)speller->check(*dup_pair.first)	       : (speller->check((String)dup_pair.first->substr(0,pos)) 		  && speller->check((String)dup_pair.first->substr(pos+1))) ))	    near_misses_final->push_back(*dup_pair.first);	}      } else {	dup_pair = duplicates_check.insert(fix_case(i->word));	if (dup_pair.second )	  near_misses_final->push_back(*dup_pair.first);      }    }  }    void Working::get_suggestions(NearMissesFinal & sug) {    near_misses_final = & sug;    if (original_word.soundslike.empty()) return;    try_others();    score_list();    transfer();  }    class SuggestionListImpl : public SuggestionList {    struct Parms {      typedef const char *                    Value;      typedef NearMissesFinal::const_iterator Iterator;      Iterator end;      Parms(Iterator e) : end(e) {}      bool endf(Iterator e) const {return e == end;}      Value end_state() const {return 0;}      Value deref(Iterator i) const {return i->c_str();}    };  public:    NearMissesFinal suggestions;    SuggestionList * clone() const {return new SuggestionListImpl(*this);}    void assign(const SuggestionList * other) {      *this = *static_cast<const SuggestionListImpl *>(other);    }    bool empty() const { return suggestions.empty(); }    Size size() const { return suggestions.size(); }    VirEmul * elements() const {      return new MakeVirEnumeration<Parms, StringEnumeration>	(suggestions.begin(), Parms(suggestions.end()));    }  };  class SuggestImpl : public Suggest {    SpellerImpl * speller_;    SuggestionListImpl  suggestion_list;    SuggestParms parms_;  public:    SuggestImpl(SpellerImpl * m)       : speller_(m), parms_(m->config()->retrieve("sug-mode"))     {parms_.fill_distance_lookup(m->config(), m->lang());}    SuggestImpl(SpellerImpl * m, const SuggestParms & p)       : speller_(m), parms_(p)     {parms_.fill_distance_lookup(m->config(), m->lang());}    PosibErr<void> set_mode(ParmString mode) {      return parms_.set(mode);    }    double score(const char *base, const char *other) {      //parms_.set_original_word_size(strlen(base));      //Score s(&speller_->lang(),base,parms_);      //string sl = speller_->lang().to_soundslike(other);      //ScoreWordSound sws(other, sl.c_str());      //s.score(sws);      //return sws.score;      return -1;    }    SuggestionList & suggest(const char * word);  };  SuggestionList & SuggestImpl::suggest(const char * word) { #   ifdef DEBUG_SUGGEST    cout << "=========== begin suggest " << word << " ===========\n";#   endif    parms_.set_original_word_size(strlen(word));    suggestion_list.suggestions.resize(0);    Working sug(speller_, &speller_->lang(),word,parms_);    sug.get_suggestions(suggestion_list.suggestions);#   ifdef DEBUG_SUGGEST    cout << "^^^^^^^^^^^  end suggest " << word << "  ^^^^^^^^^^^\n";#   endif    return suggestion_list;  }  }namespace aspeller {  Suggest * new_default_suggest(SpellerImpl * m) {    return new aspeller_default_suggest::SuggestImpl(m);  }  Suggest * new_default_suggest(SpellerImpl * m, const SuggestParms & p) {    return new aspeller_default_suggest::SuggestImpl(m,p);  }  PosibErr<void> SuggestParms::set(ParmString mode) {    if (mode != "normal" && mode != "fast" && mode != "ultra" && mode != "bad-spellers")      return make_err(bad_value, "sug-mode", mode, "one of ultra, fast, normal, or bad-spellers");    edit_distance_weights.del1 =  95;    edit_distance_weights.del2 =  95;    edit_distance_weights.swap =  90;    edit_distance_weights.sub =  100;    edit_distance_weights.similar = 10;    edit_distance_weights.max = 100;    edit_distance_weights.min =  90;    normal_soundslike_weight = 50;    small_word_soundslike_weight = 15;    small_word_threshold = 4;    soundslike_weight = normal_soundslike_weight;    word_weight       = 100 - normal_soundslike_weight;          skip = 2;    limit = 100;    if (mode == "normal") {      use_typo_analysis = true;      soundslike_level = 2; // either one or two      span = 50;    } else if (mode == "fast") {      use_typo_analysis = true;      soundslike_level = 1; // either one or two      span = 50;    } else if (mode == "ultra") {      use_typo_analysis = false;      soundslike_level = 1; // either one or two      span = 50;    } else if (mode == "bad-spellers") {      use_typo_analysis = false;      normal_soundslike_weight = 55;      small_word_threshold = 0;      soundslike_level = 2; // either one or two      span = 125;      limit = 1000;    } else {      abort(); // this should NEVER happen.    }    return no_err;  }  PosibErr<void> SuggestParms::fill_distance_lookup(const Config * c, const Language & l) {        TypoEditDistanceWeights & w = typo_edit_distance_weights;    String keyboard = c->retrieve("keyboard");    if (keyboard == "none") {            use_typo_analysis = false;          } else {      FStream in;      String file, dir1, dir2;      fill_data_dir(c, dir1, dir2);      find_file(file, dir1, dir2, keyboard, ".kbd");      RET_ON_ERR(in.open(file.c_str(), "r"));      int c = l.max_normalized() + 1;      w.repl .init(c);      w.extra.init(c);      for (int i = 0; i != c; ++i) {	for (int j = 0; j != c; ++j) {	  w.repl (i,j) = w.repl_dis2;	  w.extra(i,j) = w.extra_dis2;	}      }      String key, data;      while (getdata_pair(in, key, data)) {	if (key.size() != 2) 	  return make_err(bad_file_format, file);	w.repl (l.to_normalized(key[0]),		l.to_normalized(key[1])) = w.repl_dis1;	w.repl (l.to_normalized(key[1]),		l.to_normalized(key[0])) = w.repl_dis1;	w.extra(l.to_normalized(key[0]),		l.to_normalized(key[1])) = w.extra_dis1;	w.extra(l.to_normalized(key[1]),		l.to_normalized(key[0])) = w.extra_dis1;      }      for (int i = 0; i != c; ++i) {	w.repl(i,i) = 0;	w.extra(i,i) = w.extra_dis1;      }          }    return no_err;  }        SuggestParms * SuggestParms::clone() const {    return new SuggestParms(*this);  }  void SuggestParms::set_original_word_size(int size) {    if (size <= small_word_threshold) {      soundslike_weight = small_word_soundslike_weight;    } else {      soundslike_weight = normal_soundslike_weight;    }    word_weight = 100 - soundslike_weight;  }}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -