⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 seq_vat.h

📁 这是一个用于数据挖掘的常用算法的模板库(数据挖掘的C++模板库for UNIX)
💻 H
字号:
/* *  Copyright (C) 2005 M.J. Zaki <zaki@cs.rpi.edu> Rensselaer Polytechnic Institute *  Written by parimi@cs.rpi.edu *  Updated by chaojv@cs.rpi.edu, alhasan@cs.rpi.edu, salems@cs.rpi.edu * *  This program is free software; you can redistribute it and/or *  modify it under the terms of the GNU General Public License *  as published by the Free Software Foundation; either version 2 *  of the License, or (at your option) any later version. * *  This program is distributed in the hope that it will be useful, *  but WITHOUT ANY WARRANTY; without even the implied warranty of *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the *  GNU General Public License for more details. * *  You should have received a copy of the GNU General Public License along *  with this program; if not, write to the Free Software Foundation, Inc., *  59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. */#ifndef _SEQ_VAT_H_#define _SEQ_VAT_H_#include "pattern.h"#include "seq_instance.h"#include "generic_classes.h"#include "typedefs.h"/*** \brief SEQUENCE VAT calss for embedded mining by partial specialization of the generic VAT class. * * In this partial specialization, PP is fixed to directed, acyclic, indegree_lte_one,  * outdegree_lte_one (Tree property), MP is fixed to Fk X Fk, vert_mine, embedded (Vertical mining with Fk X Fk for embedded patterns), ST is the VAT storage type. */template<class PP, class MP, template <typename> class ALLOC, template <typename, typename> class VAT_ST >ostream& operator<< (ostream& ostr, const vat<SEQ_PROP, V_Fkk_MINE_PROP, ALLOC, VAT_ST>& svat);template<class PP, class MP, template <typename> class ALLOC, template <typename, typename> class VAT_ST >class vat<SEQ_PROP, V_Fkk_EMB_MINE_PROP, ALLOC, VAT_ST> {public:    typedef pattern_support<V_Fkk_EMB_MINE_PROP> PAT_SUP;  typedef vat<SEQ_PROP, V_Fkk_EMB_MINE_PROP, ALLOC, VAT_ST> VAT;  typedef seq_instance<V_Fkk_EMB_MINE_PROP> INSTANCE;  typedef VAT_ST<pair<int, VAT_ST<INSTANCE, ALLOC<INSTANCE> > >, ALLOC<pair<int, VAT_ST<INSTANCE, ALLOC<INSTANCE> > > > > IDLIST_T;  typedef typename IDLIST_T::const_iterator CONST_IT;  typedef typename IDLIST_T::iterator IT;    typedef VAT_ST<INSTANCE, ALLOC<INSTANCE> > INSTANCES;  typedef typename INSTANCES::const_iterator CONST_INST_IT;/**< constant iterator over instances in a tid */        void* operator new(size_t size) {      ALLOC<VAT> va;      return va.allocate(size);    }          void  operator delete(void *p, size_t size) {      if (p) {        ALLOC<VAT> va;        va.deallocate(static_cast<VAT*> (p), size);      }    }        inline IT begin() {return _idlist.begin();}    inline CONST_IT begin() const {return _idlist.begin();}    inline IT end() {return _idlist.end();}    inline CONST_IT end() const {return _idlist.end();}        bool empty() const {return _idlist.empty;}    int size() const{return _idlist.size();}        void clear() {_idlist.clear();}        void push_back(pair<int, VAT_ST<INSTANCE, ALLOC<INSTANCE> > > const& inst) {      _idlist.push_back(inst);    }        // Return the size of the Vat in bytes      unsigned long int byte_size() const{      unsigned long int  b_size=0;      CONST_IT it;      for (it = begin(); it!=end();++it){        b_size+=(2+it->second.size())*sizeof(int);      }      return b_size;    }        //serializing a VAT to an output stream    void write_file(ostream & output_file) {      int ITSZ=sizeof(int);      ostringstream output;      CONST_IT it;      CONST_INST_IT it_inst;      int tid,inst_sz,ts;      for (it=begin();it!=end();it++){        tid=it->first;        inst_sz=it->second.size();        output.write(reinterpret_cast<const char *>(&tid), ITSZ);        output.write(reinterpret_cast<const char *>(&inst_sz), ITSZ);        for (it_inst =it->second.begin(); it_inst!=it->second.end(); ++it_inst){          ts=it_inst->get_ts();            output.write(reinterpret_cast<const char *>(&ts), ITSZ);        } //for it_inst      }//it      output_file.write(output.str().c_str(), output.str().size());    } //end write_file            //deserialize a vat from an input stream     void read_file (istream & input, unsigned long int size) {      //cout<<"reading from the the file"<<endl;      int ITSZ=sizeof(int);      int buf_size=size/ITSZ;         int *buf = new int[buf_size];      input.read((char *)buf, (size));       int current=0;      while(current<buf_size){        int tid=buf[current++];        int inst_sz=buf[current++];        int inst_end=current+inst_sz;        INSTANCES new_tidlist;        while(current <inst_end){          new_tidlist.push_back(INSTANCE(buf[current++]));        }        _idlist.push_back(make_pair(tid, new_tidlist));      }            input.clear();      delete [] buf;          } //read_file        /**      * Method performs intersection of two VATs. Returns the new candidate VATs.     * Like the join of the patterns, a VAT intersection can result in more than one new candidate VAT.     * But for itemsets, just one candidate VAT is generated.     */    template<typename PATTERN>      static VAT** intersection(const VAT* const& vat_i, const VAT* const& vat_j, PAT_SUP** cand_sups, PATTERN**, bool& is_l2) {                VAT** cand_vats = new VAT*[2]; // Max of 2 candidates possible for sequence extension.                // Determine how many candidate vats.        // If P->A join P->A then results in only one candidate vat.        bool self_join_only = (cand_sups[1] == 0);                   cand_vats[0]=new VAT;         if(!self_join_only)          cand_vats[1]=new VAT;                CONST_IT it_i=vat_i->begin(), it_j=vat_j->begin();                if(it_i == vat_i->end() || it_j == vat_j->end())          return cand_vats;                while(it_i != vat_i->end() && it_j != vat_j->end()) {          if(it_i->first < it_j->first) {            it_i++;            continue;          }          if(it_i->first > it_j->first) {            it_j++;            continue;          }          //execution reaches this point only if both TIDs are equal          INSTANCES seqi_inst, seqj_inst;          bool seqi_ind = false, seqj_ind = false, seqi_emb = false, seqj_emb = false;                    //intersect the instances for this tid          INSTANCE::seq_join_seq(it_i->second, it_j->second, seqi_inst, seqj_inst, seqi_ind,                          seqj_ind, seqi_emb, seqj_emb, self_join_only);                    if(!seqi_inst.empty())            cand_vats[0]->push_back(make_pair(it_i->first, seqi_inst));                    if(!seqj_inst.empty() && !self_join_only)            cand_vats[1]->push_back(make_pair(it_i->first, seqj_inst));                    it_i++;          it_j++;                  } //end while (it_i &* it_j)                cand_sups[0]->set_sup(make_pair(cand_vats[0]->size(), cand_vats[0]->size()));        if(!self_join_only)          cand_sups[1]->set_sup(make_pair(cand_vats[1]->size(), cand_vats[1]->size()));                return cand_vats;      }    friend ostream& operator<< <>(ostream&, const VAT&);private:      IDLIST_T _idlist;};template<class PP, class MP, template <typename> class ALLOC, template <typename, typename> class VAT_ST >ostream& operator<< (ostream& ostr, const vat<SEQ_PROP, V_Fkk_MINE_PROP, ALLOC, VAT_ST>& svat) {  typedef vat<SEQ_PROP, V_Fkk_MINE_PROP, ALLOC, VAT_ST> VAT;  typename VAT::CONST_IT it=svat.begin();    while(it!=svat.end()) {    ostr<<it->first<<" ";    typename VAT::CONST_INST_IT inst_it=it->second.begin();    while(inst_it!=it->second.end())      ostr<<(*inst_it++)<<" ";    ostr<<endl;    it++;  }    return ostr;}//end friend operator<</*** \brief SEQUENCE VAT calss for induced mining by partial specialization of the generic VAT class. * * In this partial specialization, PP is fixed to directed, acyclic, indegree_lte_one,  * outdegree_lte_one (Tree property), MP is fixed to Fk X Fk, vert_mine, induced(Vertical mining with Fk X Fk for induced patterns), ST is the VAT storage type. */template<class PP, class MP, template <typename> class ALLOC, template <typename, typename> class VAT_ST >class vat<SEQ_PROP, V_Fkk_IND_MINE_PROP, ALLOC, VAT_ST> {public:    typedef pattern_support<V_Fkk_IND_MINE_PROP> PAT_SUP;  typedef vat<SEQ_PROP, V_Fkk_IND_MINE_PROP, ALLOC, VAT_ST> VAT;  typedef seq_instance<V_Fkk_IND_MINE_PROP> INSTANCE;  typedef VAT_ST<pair<int, VAT_ST<INSTANCE, ALLOC<INSTANCE> > >, ALLOC<pair<int, VAT_ST<INSTANCE, ALLOC<INSTANCE> > > > > IDLIST_T;  typedef typename IDLIST_T::const_iterator CONST_IT;  typedef typename IDLIST_T::iterator IT;  typedef typename IDLIST_T::reverse_iterator RIT;    typedef VAT_ST<INSTANCE, ALLOC<INSTANCE> > INSTANCES;  typedef typename INSTANCES::const_iterator CONST_INST_IT;/**< constant iterator over instances in a tid */            void* operator new(size_t size) {      ALLOC<VAT> va;      return va.allocate(size);    }        void  operator delete(void *p, size_t size) {      if (p) {        ALLOC<VAT> va;        va.deallocate(static_cast<VAT*> (p), size);      }    }        IT begin() {return _idlist.begin();}    CONST_IT begin() const {return _idlist.begin();}    IT end() {return _idlist.end();}    CONST_IT end() const {return _idlist.end();}    bool empty() const {return _idlist.empty;}    int size() const {return _idlist.size();}    void clear() {_idlist.clear();}    void push_back(pair<int, VAT_ST<INSTANCE, ALLOC<INSTANCE> > > const& inst) {      _idlist.push_back(inst);    }        // Return the size of the Vat in bytes      unsigned long int byte_size() const{      unsigned long int  b_size=0;      CONST_IT it;      for (it = begin(); it!=end();++it){        b_size+=(2+it->second.size()*2)*sizeof(int);      }      return b_size;    }        //serializing a VAT to an output stream    void write_file(ostream & output_file) {      int ITSZ=sizeof(int);      ostringstream output;      CONST_IT it;      CONST_INST_IT it_inst;      int tid,inst_sz,induced,pos;      for (it=begin();it!=end();it++){        tid=it->first;        inst_sz=it->second.size();        output.write(reinterpret_cast<const char *>(&tid), ITSZ);        output.write(reinterpret_cast<const char *>(&inst_sz), ITSZ);        for (it_inst =it->second.begin(); it_inst!=it->second.end(); ++it_inst){          induced=(int)it_inst->get_induced();          pos=it_inst->get_pos();            output.write(reinterpret_cast<const char *>(&pos), ITSZ);          output.write(reinterpret_cast<const char *>(&induced), ITSZ);        } //for it_inst      }//it      output_file.write(output.str().c_str(), output.str().size());    } //end write_file        //deserialize a vat from an input stream     void read_file (istream & input, unsigned long int size) {      //cout<<"reading from the the file"<<endl;      int ITSZ=sizeof(int);      int buf_size=size/ITSZ;         int *buf = new int[buf_size];      input.read((char *)buf, (size));       int current=0;      while(current<buf_size){        int tid=buf[current++];        int inst_sz=buf[current++];        int inst_end=current+inst_sz*2;        INSTANCES new_tidlist;        while(current <inst_end){          new_tidlist.push_back(INSTANCE(buf[current], (buf[current+1])?true:false));          current= current+2;        }        _idlist.push_back(make_pair(tid, new_tidlist));      }            input.clear();      delete [] buf;          } //read_file            /**      * Method performs intersection of two VATs. Returns the new candidate VATs.     * Like the join of the patterns, a VAT intersection can result in more than one new candidate VAT.     * But for itemsets, just one candidate VAT is generated.     */    template<typename PATTERN>    static VAT** intersection(const VAT* const& vat_i, const VAT* const& vat_j, PAT_SUP** cand_sups, PATTERN**, bool& is_l2) {      VAT** cand_vats = new VAT*[2]; // Max of 2 candidates possible for sequence extension.                       // Determine how many candidate vats.                       // If P->A join P->A then results in only one candidate vat.      bool skip0 = (cand_sups[0] == 0);      bool skip1 = (cand_sups[1] == 0);         cand_vats[0] = 0;      cand_vats[1] = 0;      if (!skip0)	cand_vats[0]=new VAT;       if(!skip1)        cand_vats[1]=new VAT;            if (skip0 && skip1){	return cand_vats;      }            CONST_IT it_i=vat_i->begin(), it_j=vat_j->begin();            if(it_i==vat_i->end() || it_j==vat_j->end())        return cand_vats;            while(it_i!=vat_i->end() && it_j!=vat_j->end()) {        if(it_i->first<it_j->first) {          it_i++;          continue;        }        if(it_i->first>it_j->first) {          it_j++;          continue;        }        //execution reaches this point only if both TIDs are equal        INSTANCES seqi_inst, seqj_inst;        bool seqi_ind = false, seqj_ind = false, seqi_emb = false, seqj_emb = false;                //intersect the instances this tid        INSTANCE::seq_join_seq(it_i->second, it_j->second, seqi_inst, seqj_inst, seqi_ind, 			       seqj_ind, seqi_emb, seqj_emb, skip1, skip0);        	if (!skip0){	  if(seqi_ind)	    cand_sups[0]->incr_isup();	  // else 	  if(seqi_emb)	    cand_sups[0]->incr_esup();	}	        if(!skip1) {          if(seqj_ind)            cand_sups[1]->incr_isup();          // else          if(seqj_emb)            cand_sups[1]->incr_esup();        }                if(!seqi_inst.empty() && !skip0)          cand_vats[0]->push_back(make_pair(it_i->first, seqi_inst));                if(!seqj_inst.empty() && !skip1)          cand_vats[1]->push_back(make_pair(it_i->first, seqj_inst));                it_i++;        it_j++;              } //end while (it_i &* it_j)            //cout << "ISECT I : " << *vat_i << endl;      //cout << "ISECT J : " << *vat_j << endl;      //if (!skip0)      //cout << "CAND A : " << *cand_vats[0] << endl;        //if (!skip1)      //  cout << "CAND B: " << *cand_vats[1] << endl;              return cand_vats;    }    friend ostream& operator<< <>(ostream&, const VAT&);private:      IDLIST_T _idlist;};#endif

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -