📄 readonly_ws.cpp
字号:
for (;w[j] != '\0'; ++j) buf[j] = lang->to_stripped(w[j]); buf[j] = '\0'; return v; } }; struct ReadOnlyWS::SoundslikeWordsParmsNoSL { typedef BasicWordInfo Value; typedef WordLookup::ConstFindIterator Iterator; const char * word_block_begin; SoundslikeWordsParmsNoSL(const char * b) : word_block_begin(b) {} bool endf(const Iterator & i) const {return i.at_end();} Value end_state() const {return 0;} Value deref(const Iterator & i) const { return Value(word_block_begin + i.deref(), *(word_block_begin + i.deref() - 1)); } }; struct ReadOnlyWS::SoundslikeWordsEmulSingle : public ReadOnlyWS::VirEmul { private: const char * word; public: SoundslikeWordsEmulSingle(const char * w) : word(w) {} VirEmul * clone() const {return new SoundslikeWordsEmulSingle(*this);} void assign(const VirEmul * other) { *this = *static_cast<const SoundslikeWordsEmulSingle *>(other); } bool at_end() const {return word == 0;} BasicWordInfo next() { const char * w = word; if (w != 0) { word = 0; return BasicWordInfo(w, *(w-1)); } else { return BasicWordInfo(); } } }; ReadOnlyWS::VirSoundslikeEmul * ReadOnlyWS::soundslike_elements() const { if (use_soundslike) { return new MakeVirEnumeration<SoundslikeElementsParms> (soundslike_lookup.begin(), soundslike_block); } else { return new MakeVirEnumeration<SoundslikeElementsParmsNoSL> (word_lookup.begin(), SoundslikeElementsParmsNoSL(max_word_length,block,lang())); } } ReadOnlyWS::VirEmul * ReadOnlyWS::words_w_soundslike(const char * soundslike) const { if (use_soundslike) { SoundslikeLookup::const_iterator i = soundslike_lookup.find(soundslike); if (i == soundslike_lookup.end()) { return new MakeAlwaysEndEnumeration<BasicWordInfo>(); } else { return ReadOnlyWS::words_w_soundslike (SoundslikeWord(soundslike_block + *i, 0)); } } else { WordLookup::ConstFindIterator i = word_lookup.multi_find(soundslike); return new MakeVirEnumeration<SoundslikeWordsParmsNoSL>(i, block); } } ReadOnlyWS::VirEmul * ReadOnlyWS::words_w_soundslike(SoundslikeWord w) const { if (use_soundslike) { const u32int * end = reinterpret_cast<const u32int *>(w.soundslike - 2); u16int size = *reinterpret_cast<const u16int *>(end); return new MakeVirEnumeration<SoundslikeWordsParms> (end - size, SoundslikeWordsParms(word_block, end)); } else { return new SoundslikeWordsEmulSingle (static_cast<const char *>(w.word_list_pointer)); } }} namespace aspeller { BasicWordSet * new_default_readonly_word_set() { return new aspeller_default_readonly_ws::ReadOnlyWS(); } }namespace aspeller_default_readonly_ws { using namespace aspeller; struct WordLookupParms { typedef vector<const char *> Vector; typedef const char * Value; typedef const char * Key; static const bool is_multi = true; const Key & key(const Value & v) const {return v;} InsensitiveHash hash_; size_t hash(const Key & k) const {return hash_(k);} InsensitiveEqual equal_; bool equal(const Key & rhs, const Key & lhs) const { return equal_(rhs, lhs); } bool is_nonexistent(const Value & v) const {return v == 0;} void make_nonexistent(Value & v) const {v = 0;} }; typedef VectorHashTable<WordLookupParms> WordHash; struct SoundslikeLookupParms { typedef const char * Key; struct List { union { u32int * list; u32int single; } d; u16int size; u16int num_inserted; }; typedef pair<Key, List> Value; typedef vector<Value> Vector; static const bool is_multi = false; const Key & key(const Value & v) const {return v.first;} acommon::hash<const char *> hash; bool equal(const Key & rhs, const Key & lhs) const {return strcmp(rhs,lhs) == 0;} bool is_nonexistent(const Value & v) const {return v.first == 0;} void make_nonexistent(Value & v) const { memset(&v, 0, sizeof(Value)); } }; typedef VectorHashTable<SoundslikeLookupParms> SoundHash; static inline unsigned int round_up(unsigned int i, unsigned int size) { return ((i + size - 1)/size)*size; } static void advance_file(FStream & out, int pos) { int diff = pos - out.tell(); assert(diff >= 0); for(; diff != 0; --diff) out << '\0'; } PosibErr<void> create (ParmString base, StringEnumeration * els, const Language & lang) { size_t page_size = ::page_size(); assert(sizeof(u16int) == 2); assert(sizeof(u32int) == 4); bool use_soundslike=true; if (strcmp(lang.soundslike_name(),"none") == 0) use_soundslike=false; const char * mid_chars = lang.mid_chars(); FStream out; out.open(base, "wb"); DataHead data_head; memset(&data_head, 0, sizeof(data_head)); strcpy(data_head.check_word, "aspell default speller rowl 1.4"); data_head.lang_name_size = strlen(lang.name()) + 1; data_head.soundslike_name_size = strlen(lang.soundslike_name()) + 1; data_head.soundslike_version_size = strlen(lang.soundslike_version()) + 1; data_head.middle_chars_size = strlen(mid_chars) + 1; data_head.head_size = sizeof(DataHead); data_head.head_size += data_head.lang_name_size; data_head.head_size += data_head.soundslike_name_size; data_head.head_size += data_head.soundslike_version_size; data_head.head_size = round_up(data_head.head_size, page_size); data_head.minimal_specified = u32int_max; String temp; SoundHash sound_prehash; StringBuffer sound_prehash_char_buf; vector<u32int> sound_prehash_list_buf; int sound_prehash_list_buf_size = 0; { WordHash word_hash; StringBuffer buf; word_hash.parms().hash_ .lang = ⟨ word_hash.parms().equal_.lang = ⟨ const char * w0; WordHash::MutableFindIterator j; int z = 0; // // Reading in Wordlist from stdin and creating Word Hash // while ( (w0 = els->next()) != 0) { unsigned int s = strlen(w0); vector<char> tstr(w0, w0+s+1); char * w = &tstr[0]; char * p = strchr(w, ':'); if (p == 0) { p = w + s; } else { s = p - w; *p = '\0'; ++p; } check_if_valid(lang,w); // Read in compound info CompoundInfo c; if (*c.read(p, lang) != '\0') return make_err(invalid_flag, w, p); // Check if it already has been inserted for (j =word_hash.multi_find(w); !j.at_end(); j.adv()) if (strcmp(w, j.deref())==0) break; // If already insert deal with compound info bool reinsert=false; if (!j.at_end()) { CompoundInfo c0(static_cast<unsigned char>(*(j.deref() - 1))); if (c.any() && !c0.any()) reinsert = true; else if (!c.any() || !c0.any()) ; else if (c.d != c0.d) abort(); // FIXME //return make_err(conflicting_flags, w, c0, c, lang); } // Finally insert the word into the dictionary if (j.at_end() || reinsert) { if(s > data_head.max_word_length) data_head.max_word_length = s; char * b; if (c.any()) { if (s < data_head.minimal_specified) data_head.minimal_specified = s; b = buf.alloc(s + 2); *b = static_cast<char>(c.d); ++b; } else { b = buf.alloc(s + 1); } strncpy(b, w, s+1); word_hash.insert(b); } ++z; } delete els; word_hash.resize(word_hash.size()*4/5); // // Witting word data, creating Final Hash, creating sounds Pre Hash // advance_file(out, data_head.head_size); long int start = data_head.head_size; if (use_soundslike) sound_prehash.resize(word_hash.bucket_count()); vector<u32int> final_hash(word_hash.bucket_count(), u32int_max); out << '\0'; for (unsigned int i = 0; i != word_hash.vector().size(); ++i) { const char * value = word_hash.vector()[i]; if (word_hash.parms().is_nonexistent(value)) continue; // write compound info if (*(value - 1) != '\0') out << *(value-1); final_hash[i] = out.tell() - start; out << value << '\0'; if (use_soundslike) { temp = lang.to_soundslike(value); SoundHash::iterator j = sound_prehash.find(temp.c_str()); if (j == sound_prehash.end()) { SoundHash::value_type to_insert; to_insert.first = sound_prehash_char_buf.alloc(temp.size()+1); strncpy(const_cast<char *>(to_insert.first), temp.c_str(), temp.size() + 1); sound_prehash.insert(to_insert).first->second.size = 1; } else { if (j->second.size == 1) sound_prehash_list_buf_size++; j->second.size++; sound_prehash_list_buf_size++; } } } if (use_soundslike) { sound_prehash_list_buf.resize(sound_prehash_list_buf_size); int p = 0; for (unsigned int i = 0; i != word_hash.vector().size(); ++i) { const char * value = word_hash.vector()[i]; if (word_hash.parms().is_nonexistent(value)) continue; temp = lang.to_soundslike(value); SoundHash::iterator j = sound_prehash.find(temp.c_str()); //assert(j != sound_prehash.end()); if (j->second.num_inserted == 0 && j->second.size != 1) { j->second.d.list = &*sound_prehash_list_buf.begin() + p; p += j->second.size; } if (j->second.size == 1) { j->second.d.single = final_hash[i]; } else { j->second.d.list[j->second.num_inserted] = final_hash[i]; } ++j->second.num_inserted; } } data_head.word_block_size = round_up(out.tell() - start + 1l, page_size); data_head.total_block_size = data_head.word_block_size; advance_file(out, data_head.head_size + data_head.total_block_size); // Writting final hash out.write(reinterpret_cast<const char *>(&final_hash.front()), final_hash.size() * 4); data_head.word_count = word_hash.size(); data_head.word_buckets = word_hash.bucket_count(); data_head.word_size = round_up(word_hash.bucket_count() * 4, page_size); data_head.total_block_size += data_head.word_size; advance_file(out, data_head.head_size + data_head.total_block_size); } if (use_soundslike) { sound_prehash.resize(sound_prehash.size()*4/5); vector<u32int> final_hash(sound_prehash.bucket_count(), u32int_max); long int start = out.tell(); // // Writting soundslike words, creating soundslike Final Hash // for (unsigned int i = 0; i != sound_prehash.vector().size(); ++i) { const SoundHash::value_type & value = sound_prehash.vector()[i]; if (sound_prehash.parms().is_nonexistent(value)) continue; u16int count = value.second.size; if (count == 1) { out.write(reinterpret_cast<const char *>(&value.second.d.single), 4); } else { out.write(reinterpret_cast<const char *>(value.second.d.list), count * 4); } out.write(reinterpret_cast<char *>(&count),2); final_hash[i] = out.tell() - start; out << value.first << '\0'; advance_file(out, round_up(out.tell(), 4)); } data_head.soundslike_block_size = round_up(out.tell() - start, page_size); data_head.total_block_size += data_head.soundslike_block_size; // Witting Final soundslike Hash advance_file(out, data_head.head_size + data_head.total_block_size); out.write(reinterpret_cast<char *>(&final_hash.front()), final_hash.size() * 4); data_head.soundslike_count = sound_prehash.size(); data_head.soundslike_buckets = sound_prehash.bucket_count(); data_head.soundslike_size = round_up(final_hash.size() * 4, page_size); data_head.total_block_size += data_head.soundslike_size; } advance_file(out, data_head.head_size + data_head.total_block_size); // write data head to file out.restart(); out.write((char *)&data_head, sizeof(DataHead)); out.write(lang.name(), data_head.lang_name_size); out.write(lang.soundslike_name(), data_head.soundslike_name_size); out.write(lang.soundslike_version(), data_head.soundslike_version_size); out.write(mid_chars, data_head.middle_chars_size); return no_err; }}namespace aspeller { PosibErr<void> create_default_readonly_word_set(StringEnumeration * els, Config & config) { Language lang; RET_ON_ERR(lang.setup("",&config)); aspeller_default_readonly_ws::create(config.retrieve("master-path"), els,lang); return no_err; }}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -