⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 tree_tokenizer.h

📁 这是一个用于数据挖掘的常用算法的模板库(数据挖掘的C++模板库for UNIX)
💻 H
字号:
/* *  Copyright (C) 2005 M.J. Zaki <zaki@cs.rpi.edu> Rensselaer Polytechnic Institute *  Written by parimi@cs.rpi.edu *  Updated by chaojv@cs.rpi.edu, alhasan@cs.rpi.edu, salems@cs.rpi.edu *  Modifications: *    Added tokenizer properties -- Zaki, 5/8/06 * *  This program is free software; you can redistribute it and/or *  modify it under the terms of the GNU General Public License *  as published by the Free Software Foundation; either version 2 *  of the License, or (at your option) any later version. * *  This program is distributed in the hope that it will be useful, *  but WITHOUT ANY WARRANTY; without even the implied warranty of *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the *  GNU General Public License for more details. * *  You should have received a copy of the GNU General Public License along *  with this program; if not, write to the Free Software Foundation, Inc., *  59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. */// tokenizer for trees#ifndef _TREE_TOKENIZER_H#define _TREE_TOKENIZER_H#include "pattern.h"#include "pat_fam.h"#include "tree_vat.h"#include "element_parser.h"#include "generic_classes.h"#include "tokenizer_utils.h"/*** \brief Tree tokenizer class by partial specialization of the generic tokenizer class. * * the template argument is instantiated with a pattern that has directed, acyclic, indegree_lte_one pattern property(tree), * MINING_PROPS type of mining property, ST type of pattern storage and CC type of * canocial code. */template<typename PP, typename MP, typename TP, typename PAT_ST, template<typename, typename, typename, template <typename> class > class CC,template <typename> class ALLOC >class tokenizer<TREE_PATTERN, DMTL_TKNZ_PROP, ALLOC >{  public:  typedef vat<TREE_PROP, V_Fkk_MINE_PROP, ALLOC, std::vector> VAT;  typedef tree_instance<std::vector, PP > TREE_INSTANCE;      tokenizer(const int max=LINE_SZ): MAXLINE(max) {}      /**    * returns the TID of transaction read;   *   * parses one transaction from input database, and collects VATS in vat_hmap   * return value is -1 on end of stream   */  template<class SM_T>    int parse_next_trans(ifstream& infile, pat_fam<TREE_PATTERN>& freq_pats, storage_manager<TREE_PATTERN, VAT, ALLOC, SM_T>& vat_hmap) {      char* line=new char[MAXLINE];      char word[MAXLINE];      char* startline=line;      const int BK_TRK=-1; // backtrack symbol for trees            int len;      int count; //# of words parsed from line      int tid=-1;      int dfs_id=-1;      int num_items=3; //# of objects on this transaction      VAT* tvat;      TREE_PATTERN* p;      int depth; // maintains current depth in tree      vector<pair<typename TREE_PATTERN::VERTEX_T, pair<int, bool> > > obj_data;      // stores info for scope computation for generating level-1 vats      // the first int in 2nd pair is current scope for pattern      // bool is if the object is still in scope            typedef element_parser<typename TREE_PATTERN::VERTEX_T> V_P;            *line='\0';      infile.getline(line, MAXLINE-1);      len=strlen(line);      if(!len || !line) {        delete[] startline;        return -1;      }            line[len++]='\0';      count=0;      depth=0;      //obj_data.reserve(MAXLINE-1);            while(count<num_items+3 && line<(startline+len)) {        if(!(line=parse_word()(line, word))) {          //parse_word() failed          delete[] startline;          return -1;        }        count++;                switch(count) {          case 1:            //this is tid            tid=atoi(word); break;                      case 2:             //this is cid            break;                  case 3:            //this is # of elements on line            num_items=atoi(word);            break;                      default:            typename TREE_PATTERN::VERTEX_T v=el_prsr.parse_element(word);                        if(V_P::notEq(v, V_P::convert(BK_TRK))) {              dfs_id++;              depth++;              for(unsigned int i=0; i<obj_data.size(); i++)                if(obj_data[i].second.second)                  obj_data[i].second.first++;              obj_data.push_back(make_pair(v, make_pair(0, 1)));            }              else {                depth--;                for(unsigned int i=0; i<obj_data.size(); i++) {                  obj_data[i].second.first--;                  if(obj_data[i].second.second && (obj_data[i].second.first<0)) {                    // add this pattern & scope to VAT                    obj_data[i].second.second=0;                    p=new TREE_PATTERN;                    p->add_vertex(obj_data[i].first);                    p->init_canonical_code(obj_data[i].first);                                        if((tvat=vat_hmap.get_vat(p))) {                      // vat found, check if this tid occurs                      typename VAT::IT it=tvat->end()-1;                      if(tvat->size() && it->first==tid)                        it->second.push_back(TREE_INSTANCE(i, dfs_id, depth, 1));                      else {                        typename VAT::template ST<TREE_INSTANCE, ALLOC<TREE_INSTANCE> > vec_inst;                        vec_inst.push_back(TREE_INSTANCE(i, dfs_id, depth, 1));                        tvat->push_back(make_pair(tid, vec_inst));                      }                      delete p;                    }                    else {                      // vat not found, create new one and insert it                      tvat=new VAT;                      typename VAT::template ST<TREE_INSTANCE, ALLOC<TREE_INSTANCE> > vec_inst;                      vec_inst.push_back(TREE_INSTANCE(i, dfs_id, depth, 1));                      tvat->push_back(make_pair(tid, vec_inst));                      if(!vat_hmap.add_vat(p, tvat)) {                        cerr<<"tokenizer.get_length_one(tree): add_vat failed"<<endl;                        return -1;                      }                      freq_pats.push_back(p);                    }                                      }//end if obj_data..<0                }//end for i              }//end else v!=BK_TRK                      }//end switch              }//end while(count<..)            // special case: root of tree has not been added yet      if(!obj_data[0].second.second) {        cerr<<"tokenizer.get_length_one(tree): root of tree has invalid scope"<<endl;        return -1;      }            // add its pattern & scope to VAT      obj_data[0].second.second=0;      p=new TREE_PATTERN;      p->add_vertex(obj_data[0].first);      p->init_canonical_code(obj_data[0].first);            if((tvat=vat_hmap.get_vat(p))) {        // vat found, check if this tid occurs        typename VAT::IT it=tvat->end()-1;        if(tvat->size() && it->first==tid)          it->second.push_back(TREE_INSTANCE(0, dfs_id, 0, 1));        else {          typename VAT::template ST<TREE_INSTANCE, ALLOC<TREE_INSTANCE> > vec_inst;          vec_inst.push_back(TREE_INSTANCE(0, dfs_id, 0, 1));          tvat->push_back(make_pair(tid, vec_inst));        }        delete p;      }      else {        // vat not found, create new one and insert it        tvat=new VAT;        typename VAT::template ST<TREE_INSTANCE, ALLOC<TREE_INSTANCE> > vec_inst;        vec_inst.push_back(TREE_INSTANCE(0, dfs_id, 0, 1));        tvat->push_back(make_pair(tid, vec_inst));        if(!vat_hmap.add_vat(p, tvat)) {          cerr<<"tokenizer.get_length_one(tree): add_vat failed"<<endl;          return -1;        }        freq_pats.push_back(p);      }            //cout<<"tokenizer(trees).parse_next_trans exit"<<endl;            delete[] startline;      return tid;          }//end parse_next_trans()  private:    const int MAXLINE;  element_parser<typename TREE_PATTERN::VERTEX_T> el_prsr;  };//end class tokenizer for trees#endif

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -