📄 chm.cpp

📁 It is a chm file viewer lib with source code
💻 CPP
📖 第 1 页 / 共 2 页
字号:
上一页 12
bool chm::chmfile::read (const string& path, std::vector<char>& out) const{  chmistream in(*this, path);  if ( !in ) return false;  out.resize (in.read_left());  in.read (&out[0], out.size());  return true;}bool chm::chmfile::read (const string& path, char *buf, size_t buf_len) const{  chmistream in(*this, path);  if ( !in ) return false;  in.read (buf, buf_len);  return true;}namespace {  struct readdir_str {    std::map<string, int> found;    std::list<string> *entries;    string path;    int flags;  };  // this chmlib enumeration is cimpletely screwed up  int chm_readdir (struct chmFile *, struct chmUnitInfo *ui, void *context)  {    readdir_str& ctx = *(readdir_str *)context;    std::list<string> &entries = *ctx.entries;    string add = ui->path;    add = add.substr(ctx.path.size());    string::iterator it = find(add.begin(), add.end(), '/');    if ( it != add.end() ) {        if ( !(ctx.flags & CHM_ENUMERATE_DIRS) )            return CHM_ENUMERATOR_CONTINUE;        add.erase (++it, add.end());    } else if ( !(ctx.flags & CHM_ENUMERATE_FILES) ) {        return CHM_ENUMERATOR_CONTINUE;    }    if ( !ctx.found[add]++ )        entries.push_back (add);    return CHM_ENUMERATOR_CONTINUE;  }}// the chmlib-s enumeration seems to be a bit broken and is built// not very nicely for doing things like this// for some archives i didn't get the whole directory treebool chm::chmfile::readdir (const string& path, std::list<string>& entries, int type ) const{  int flags = 0;  flags |= (type&files) ? CHM_ENUMERATE_FILES : 0;  flags |= (type&dirs) ? CHM_ENUMERATE_DIRS : 0;  flags |= (type&special) ? CHM_ENUMERATE_SPECIAL : 0;  flags |= (type&meta) ? CHM_ENUMERATE_META : 0;  flags |= CHM_ENUMERATE_NORMAL;  readdir_str str;  str.entries = &entries;  str.path = path;  str.flags = flags;  if ( !path.empty() && str.path[str.path.size() - 1] != '/' ) str.path += "/";  chm_enumerate_dir (chm, path.c_str(), flags, chm_readdir, &str);  return true;}void chm::chmfile::close (){  chm_close (chm);  chm = NULL;  delete tree;  tree = NULL;}chm::chmfile::~chmfile (){  if ( is_open () ) close ();}namespace {  template<class T> void destroy (T* obj) { delete obj; }}chm::chm_topics_tree::~chm_topics_tree (){  std::for_each (children.begin(), children.end(), destroy<chm_topics_tree>);}chm::chmistream::chmistream (const chmfile& chm, const std::string& path, size_t buf_size) : std::istream (NULL){  buf = chm.open (path, buf_size);  init (buf);  release = true;}chm::chmistream::~chmistream (){  if ( release ) delete buf;}std::streamsize chm::chmistream::read_left () const{  return ((chmstreambuf *)buf)->read_left ();}namespace {  struct ss_hdr {    unsigned char dis;    unsigned char dir;    unsigned char ccs;    unsigned char ccr;    unsigned char lcs;    unsigned char lcr;  };  // should make something more efficient later  int icmp (const vector<char>& l, const vector<char>&r) {      string ls(l.begin(), l.end());      string rs(r.begin(), r.end());      transform (ls.begin(), ls.end(), ls.begin(), (int(*)(int))std::tolower);      transform (rs.begin(), rs.end(), rs.begin(), (int(*)(int))std::tolower);//      if ( ls > rs ) { std::cerr << "ls > rs: , ls = [" << ls << "], rs = [" << rs << "]" << std::endl; }      if ( ls < rs ) return -1;      else if ( ls > rs ) return 1;      else return 0;  }}bool chm::chmfile::search_index (const std::string& text, std::list<chm_search_match>& found,    bool whole_words, bool titles_only) const{  chmistream imain(*this, "/$FIftiMain");  chmistream iwlc(*this, "/$FIftiMain");  chmistream itopics(*this, "/#TOPICS");  chmistream istrings(*this, "/#STRINGS");  chmistream iurltbl(*this, "/#URLTBL");  chmistream iurlstr(*this, "/#URLSTR");  vector<char> search_text(text.begin(), text.end());  if ( !imain || !itopics || !istrings || !iurltbl || !iurlstr ) return false;  char hdr[0x32];  if ( !imain.read(hdr, 0x32) ) return false;  ss_hdr s;  memcpy (&s, hdr + 0x1e, 6);  // scales are always two, nobody ever seen other scales  // or has credible info how it would work, so scales are kinda pointless  if ( s.dis != 2 || s.ccs != 2 || s.lcs != 2 ) return false;  imain.seekg(0x14);  long node_offset = imain.get_dword();  long tree_depth = imain.get_word();  imain.seekg(0x2e);  long node_len = imain.get_dword();//  std::cerr << "no: " << node_offset << " nl: " << node_len << " td: " << tree_depth << std::endl;  vector<char> rw(256);  vector<char> key;  while ( node_offset ) {      if ( tree_depth ) tree_depth--;      imain.seekg (node_offset);      if ( imain.read_left() < node_len ) return false;      long end;      if ( tree_depth ) { // we're at index node          long free_space = imain.get_word();          end = node_offset + node_len - free_space;      } else {          long next_offset = imain.get_dword(); // offset          imain.get_word(); // unknown          long free_space = imain.get_word();          end = node_offset + node_len - free_space;          node_offset = next_offset;      }      while ( imain.tellg() < end ) { // read as many entries as there are in this node          size_t word_len = imain.get() - 1;          size_t pos = imain.get();          rw.resize (word_len);          imain.read(&rw[0], word_len);          if ( pos && key.size() >= pos ) {              key.resize(pos);              key.insert (key.end(), rw.begin(), rw.end());          } else {              key = rw;          }//          std::cerr << " key = " << string(key.begin(), key.end()) << std::endl;          if ( tree_depth ) { // index node              long next_offset = imain.get_dword();//              std::cerr << "HMM, depth = " << tree_depth << ", next offset = " << next_offset << std::endl;              imain.get_word();              if ( icmp (search_text, key) < 0 ) {                  node_offset = next_offset;                  break;              }              continue;          }          // now we have to deal with leaf node          int is_title = imain.get(); // 0 = body tag, 1 = title tag          size_t wlc_count = imain.get_encint();          size_t wlc_offset = imain.get_dword();          imain.get_word(); // unknown          imain.get_encint(); // wlc_size//          std::cerr << "Processing to selecting and filtering and comparing.. " << std::endl;          if ( !is_title && titles_only || key.size() < search_text.size() ) continue; // we only want titles          int cmp;          if ( whole_words ) {              cmp = icmp (search_text, key);          } else {              cmp = icmp (search_text, vector<char>(key.begin(), key.begin() + search_text.size()));          }          if ( cmp < 0 ) {//              std::cerr << "THE END!" << std::endl;              node_offset = 0;              break;          }          if ( cmp != 0 ) continue; // better luck next time          found.push_back(chm_search_match());          chm_search_match &match = found.back();          match.is_title = is_title;          match.key = string(key.begin(), key.end());          match.documents.reserve (wlc_count);          iwlc.seekg (wlc_offset);          size_t document_index = 0;          for ( size_t i = 0; i < wlc_count; i++ ) {              int pos = 0;              document_index += iwlc.get_sr (s.dis, s.dir, pos);              match.documents.push_back(chm_search_document());              chm_search_document &doc = match.documents.back();              itopics.seekg(document_index * 16);              itopics.get_dword(); // offset to #TOCIDX              int title_offset = itopics.get_dword (); // offset to #STRINGS for title or -1              int urltbl_offset = itopics.get_dword (); // offset to #URLTBL              iurltbl.seekg (urltbl_offset);              iurltbl.get_dword(); // unknown              iurltbl.get_dword(); // index in #TOPICS              int urlstr_offset = iurltbl.get_dword();              iurlstr.seekg(urlstr_offset + 8);              std::getline (iurlstr, doc.path, '\0');              doc.path = "/" + doc.path;              if ( title_offset != -1 ) {                  istrings.seekg(title_offset);                  std::getline (istrings, doc.title, '\0');              }              size_t count = iwlc.get_sr(s.ccs, s.ccr, pos);              doc.offsets.reserve(count);              for ( size_t j = 0; j < count; j++ ) {                  size_t where = iwlc.get_sr(s.lcs, s.lcr, pos);                  doc.offsets.push_back(where);              }              iwlc.get_sr_finish(pos);          }      }  }  return true;}bool chm::chmfile::cache (const string& path){  if ( cache_data.find(path) != cache_data.end() ) return false; // already in the cache  chmistream in(*this, path);  if ( !in ) return false;  vector<char>& v = cache_data[path];  v.resize (in.read_left());  in.read(&v[0], v.size());  return true;}void chm::chmfile::cache_search_database (){  cache("/$FIftiMain");  cache("/#TOPICS");  cache("/#STRINGS");  cache("/#URLTBL");  cache("/#URLSTR");}
上一页 12
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -