⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 readonly_ws.cpp

📁 unix/linux下拼写检查程序源码
💻 CPP
📖 第 1 页 / 共 2 页
字号:
      for (;w[j] != '\0'; ++j)	buf[j] = lang->to_stripped(w[j]);      buf[j] = '\0';      return v;    }  };  struct ReadOnlyWS::SoundslikeWordsParmsNoSL {    typedef BasicWordInfo                       Value;    typedef WordLookup::ConstFindIterator  Iterator;     const char * word_block_begin;    SoundslikeWordsParmsNoSL(const char * b)       : word_block_begin(b) {}    bool endf(const Iterator & i) const {return i.at_end();}    Value end_state() const {return 0;}    Value deref(const Iterator & i) const     {      return Value(word_block_begin + i.deref(), 		   *(word_block_begin + i.deref() - 1));    }  };  struct ReadOnlyWS::SoundslikeWordsEmulSingle : public ReadOnlyWS::VirEmul  {  private:    const char * word;  public:    SoundslikeWordsEmulSingle(const char * w) : word(w) {}    VirEmul * clone() const {return new SoundslikeWordsEmulSingle(*this);}    void assign(const VirEmul * other) {      *this = *static_cast<const SoundslikeWordsEmulSingle *>(other);    }    bool at_end() const {return word == 0;}    BasicWordInfo next() {      const char * w = word;      if (w != 0) {	word = 0;	return BasicWordInfo(w, *(w-1));      } else {	return BasicWordInfo();      }    }  };        ReadOnlyWS::VirSoundslikeEmul * ReadOnlyWS::soundslike_elements() const {    if (use_soundslike) {            return new MakeVirEnumeration<SoundslikeElementsParms>	(soundslike_lookup.begin(), soundslike_block);    } else {      return new MakeVirEnumeration<SoundslikeElementsParmsNoSL>	(word_lookup.begin(), 	 SoundslikeElementsParmsNoSL(max_word_length,block,lang()));          }  }      ReadOnlyWS::VirEmul *   ReadOnlyWS::words_w_soundslike(const char * soundslike) const {    if (use_soundslike) {      SoundslikeLookup::const_iterator i = soundslike_lookup.find(soundslike);      if (i == soundslike_lookup.end()) { 	return new MakeAlwaysEndEnumeration<BasicWordInfo>();      } else {	return ReadOnlyWS::words_w_soundslike	  (SoundslikeWord(soundslike_block + *i, 0));      }    } else {      WordLookup::ConstFindIterator i = word_lookup.multi_find(soundslike);      return new MakeVirEnumeration<SoundslikeWordsParmsNoSL>(i, block);          }      }    ReadOnlyWS::VirEmul *  ReadOnlyWS::words_w_soundslike(SoundslikeWord w) const {    if (use_soundslike) {          const u32int * end = reinterpret_cast<const u32int *>(w.soundslike - 2);      u16int size = *reinterpret_cast<const u16int *>(end);            return new MakeVirEnumeration<SoundslikeWordsParms>	(end - size, SoundslikeWordsParms(word_block, end));    } else {            return new SoundslikeWordsEmulSingle	(static_cast<const char *>(w.word_list_pointer));          }  }}  namespace aspeller {  BasicWordSet * new_default_readonly_word_set() {    return new aspeller_default_readonly_ws::ReadOnlyWS();  }  }namespace aspeller_default_readonly_ws {  using namespace aspeller;  struct WordLookupParms {    typedef vector<const char *> Vector;    typedef const char *         Value;    typedef const char *         Key;    static const bool is_multi = true;    const Key & key(const Value & v) const {return v;}    InsensitiveHash hash_;    size_t hash(const Key & k) const {return hash_(k);}    InsensitiveEqual equal_;    bool equal(const Key & rhs, const Key & lhs) const {      return equal_(rhs, lhs);    }    bool is_nonexistent(const Value & v) const {return v == 0;}    void make_nonexistent(Value & v) const {v = 0;}  };  typedef VectorHashTable<WordLookupParms> WordHash;  struct SoundslikeLookupParms {    typedef const char *                Key;    struct List {      union {	u32int * list;	u32int   single;      } d;      u16int   size;      u16int   num_inserted;    };    typedef pair<Key, List> Value;    typedef vector<Value>   Vector;    static const bool is_multi = false;    const Key & key(const Value & v) const {return v.first;}    acommon::hash<const char *>  hash;    bool equal(const Key & rhs, const Key & lhs) const {return strcmp(rhs,lhs) == 0;}    bool is_nonexistent(const Value & v) const {return v.first == 0;}    void make_nonexistent(Value & v) const {      memset(&v, 0, sizeof(Value));    }  };  typedef VectorHashTable<SoundslikeLookupParms> SoundHash;  static inline unsigned int round_up(unsigned int i, unsigned int size) {    return ((i + size - 1)/size)*size;  }  static void advance_file(FStream & out, int pos) {    int diff = pos - out.tell();    assert(diff >= 0);    for(; diff != 0; --diff)      out << '\0';  }    PosibErr<void> create (ParmString base, 			 StringEnumeration * els,			 const Language & lang)   {    size_t page_size = ::page_size();        assert(sizeof(u16int) == 2);    assert(sizeof(u32int) == 4);    bool use_soundslike=true;    if (strcmp(lang.soundslike_name(),"none") == 0)      use_soundslike=false;    const char * mid_chars = lang.mid_chars();    FStream out;       out.open(base, "wb");    DataHead data_head;    memset(&data_head, 0, sizeof(data_head));    strcpy(data_head.check_word, "aspell default speller rowl 1.4");    data_head.lang_name_size          = strlen(lang.name()) + 1;    data_head.soundslike_name_size    = strlen(lang.soundslike_name()) + 1;    data_head.soundslike_version_size = strlen(lang.soundslike_version()) + 1;    data_head.middle_chars_size       = strlen(mid_chars) + 1;    data_head.head_size  = sizeof(DataHead);    data_head.head_size += data_head.lang_name_size;    data_head.head_size += data_head.soundslike_name_size;    data_head.head_size += data_head.soundslike_version_size;    data_head.head_size  = round_up(data_head.head_size, page_size);    data_head.minimal_specified = u32int_max;    String temp;    SoundHash sound_prehash;    StringBuffer   sound_prehash_char_buf;    vector<u32int> sound_prehash_list_buf;    int            sound_prehash_list_buf_size = 0;    {      WordHash word_hash;      StringBuffer buf;      word_hash.parms().hash_ .lang = &lang;      word_hash.parms().equal_.lang = &lang;      const char * w0;      WordHash::MutableFindIterator j;      int z = 0;      //      // Reading in Wordlist from stdin and creating Word Hash      //      while ( (w0 = els->next()) != 0) {	unsigned int s = strlen(w0);	vector<char> tstr(w0, w0+s+1);	char * w = &tstr[0];		char * p = strchr(w, ':');	if (p == 0) {	  p = w + s;	} else {	  s = p - w;	  *p = '\0';	  ++p;	}		check_if_valid(lang,w);	// Read in compound info		CompoundInfo c;	if (*c.read(p, lang) != '\0')	  return make_err(invalid_flag, w, p);		// Check if it already has been inserted	for (j =word_hash.multi_find(w); !j.at_end(); j.adv())	  if (strcmp(w, j.deref())==0) break;	// If already insert deal with compound info	bool reinsert=false;	if (!j.at_end()) {	  CompoundInfo c0(static_cast<unsigned char>(*(j.deref() - 1)));	  if (c.any() && !c0.any())	    reinsert = true;	  else if (!c.any() || !c0.any())	    ;	  else if (c.d != c0.d)	    abort(); // FIXME	    //return make_err(conflicting_flags, w, c0, c, lang);	}		// Finally insert the word into the dictionary	if (j.at_end() || reinsert) {	  if(s > data_head.max_word_length)	    data_head.max_word_length = s;	  char * b;	  if (c.any()) {	    if (s < data_head.minimal_specified)	      data_head.minimal_specified = s;	    b = buf.alloc(s + 2);	    *b = static_cast<char>(c.d);	    ++b;	  } else {	    b = buf.alloc(s + 1);	  }	  strncpy(b, w, s+1);	  word_hash.insert(b);	}	++z;      }      delete els;            word_hash.resize(word_hash.size()*4/5);            //      // Witting word data, creating Final Hash, creating sounds Pre Hash      //            advance_file(out, data_head.head_size);      long int start = data_head.head_size;      if (use_soundslike)	sound_prehash.resize(word_hash.bucket_count());            vector<u32int> final_hash(word_hash.bucket_count(), u32int_max);            out << '\0';      for (unsigned int i = 0; i != word_hash.vector().size(); ++i) {		const char * value = word_hash.vector()[i];		if (word_hash.parms().is_nonexistent(value)) continue;	// write compound info	if (*(value - 1) != '\0')	  out << *(value-1);	final_hash[i] = out.tell() - start;	out << value << '\0';	if (use_soundslike) {	  temp = lang.to_soundslike(value);	  SoundHash::iterator j = sound_prehash.find(temp.c_str());	  if (j == sound_prehash.end()) {	    SoundHash::value_type to_insert;	    to_insert.first = sound_prehash_char_buf.alloc(temp.size()+1);	    strncpy(const_cast<char *>(to_insert.first), 		    temp.c_str(), 		    temp.size() + 1);	    sound_prehash.insert(to_insert).first->second.size = 1;	  } else {	    if (j->second.size == 1)	      sound_prehash_list_buf_size++;	    	    j->second.size++;	    sound_prehash_list_buf_size++;	  }	}      }      if (use_soundslike) {	sound_prehash_list_buf.resize(sound_prehash_list_buf_size);	int p = 0;	for (unsigned int i = 0; i != word_hash.vector().size(); ++i) {	  	  const char * value = word_hash.vector()[i];	  	  if (word_hash.parms().is_nonexistent(value)) continue;	  	  temp = lang.to_soundslike(value);	  SoundHash::iterator j = sound_prehash.find(temp.c_str());	  //assert(j != sound_prehash.end());	  if (j->second.num_inserted == 0 && j->second.size != 1) {	    j->second.d.list = &*sound_prehash_list_buf.begin() + p;	    p += j->second.size;	  } 	  if (j->second.size == 1) {	    j->second.d.single = final_hash[i];	  } else {	    j->second.d.list[j->second.num_inserted] = final_hash[i];	  }	  ++j->second.num_inserted;	}      }            data_head.word_block_size = round_up(out.tell() - start + 1l, 					   page_size);      data_head.total_block_size = data_head.word_block_size;      advance_file(out, data_head.head_size + data_head.total_block_size);      // Writting final hash      out.write(reinterpret_cast<const char *>(&final_hash.front()),		final_hash.size() * 4);      data_head.word_count   = word_hash.size();      data_head.word_buckets = word_hash.bucket_count();      data_head.word_size    	= round_up(word_hash.bucket_count() * 4, page_size);      data_head.total_block_size += data_head.word_size;      advance_file(out, data_head.head_size + data_head.total_block_size);    }        if (use_soundslike) {      sound_prehash.resize(sound_prehash.size()*4/5);          vector<u32int> final_hash(sound_prehash.bucket_count(), u32int_max);      long int start = out.tell();            //      // Writting soundslike words, creating soundslike Final Hash      //      for (unsigned int i = 0; i != sound_prehash.vector().size(); ++i) {		const SoundHash::value_type & value = sound_prehash.vector()[i];		if (sound_prehash.parms().is_nonexistent(value)) continue;	u16int count = value.second.size;	if (count == 1) {	  out.write(reinterpret_cast<const char *>(&value.second.d.single), 		    4);	} else {	  out.write(reinterpret_cast<const char *>(value.second.d.list),		    count * 4);	}	out.write(reinterpret_cast<char *>(&count),2);	final_hash[i] = out.tell() - start;	out << value.first << '\0';	advance_file(out, round_up(out.tell(), 4));      }      data_head.soundslike_block_size 	= round_up(out.tell() - start, page_size);      data_head.total_block_size += data_head.soundslike_block_size;      // Witting Final soundslike Hash      advance_file(out, data_head.head_size + data_head.total_block_size);      out.write(reinterpret_cast<char *>(&final_hash.front()),		final_hash.size() * 4);      data_head.soundslike_count   = sound_prehash.size();      data_head.soundslike_buckets = sound_prehash.bucket_count();      data_head.soundslike_size    	= round_up(final_hash.size() * 4, page_size);      data_head.total_block_size += data_head.soundslike_size;        }    advance_file(out, data_head.head_size + data_head.total_block_size);    // write data head to file    out.restart();    out.write((char *)&data_head, sizeof(DataHead));    out.write(lang.name(), data_head.lang_name_size);    out.write(lang.soundslike_name(), data_head.soundslike_name_size);    out.write(lang.soundslike_version(), data_head.soundslike_version_size);    out.write(mid_chars, data_head.middle_chars_size);     return no_err;  }}namespace aspeller {  PosibErr<void> create_default_readonly_word_set(StringEnumeration * els,                                                  Config & config)  {    Language lang;    RET_ON_ERR(lang.setup("",&config));    aspeller_default_readonly_ws::create(config.retrieve("master-path"),				       els,lang);    return no_err;  }}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -