⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 prefixspan.cpp

📁 经典的数据关联挖掘算法prefixspan
💻 CPP
字号:
/* PrefixSpan: An efficient algorithm for sequential pattern mining $Id: prefixspan.cpp,v 1.3 2002/01/07 01:30:04 taku-ku Exp $; Copyright (C) 2002 Taku Kudo  All rights reserved. This is free software with ABSOLUTELY NO WARRANTY. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA*/#include <iostream>#include <map>#include <vector>#include <string>#include <string.h>#include <unistd.h>#include <stdlib.h>using namespace std;class string2string {public:   string operator() (const string &str) const { return str; };};class string2int {public:   unsigned int operator() (const string &str) const { return atoi (str.c_str()); };};template <class T = unsigned int, class F = string2int> class PrefixSpan {private:  vector < vector <T> >             transaction;  vector < pair <T, unsigned int> > pattern;  T init;  unsigned int minsup;  unsigned int minpat;  string delimiter;  bool verbose;  ostream *os;  void split (const string &src, 	      const string& key, 	      vector <T>& result)  {    result.clear();    int len =  src.size();    int i = 0; int si = 0;    while(i < len) {      while (i < len && key.find(src[i]) != string::npos) { si++; i++; };      while (i < len && key.find(src[i]) == string::npos) i++;      result.push_back(F()(src.substr(si,i-si)));      si = i;    }  }  void project (T opat, vector <pair <unsigned int, int> > &projected)  {    if (verbose)       cerr << "## projected by pattern [" << opat << "], " 	   << projected.size() << " instances\n";    map <T, vector <pair <unsigned int, int> > > cnt;      for (unsigned int i = 0; i < projected.size(); i++) {      int pos = projected[i].second;      unsigned int id  = projected[i].first;      unsigned int size = transaction[id].size();      map <T, int> tmp;      for (unsigned int j = pos + 1; j < size; j++) {	T item = transaction[id][j];	if (tmp.find (item) == tmp.end()) tmp[item] = j ;      }      for (map <T, int>::iterator k = tmp.begin(); k != tmp.end(); ++k) 	cnt[k->first].push_back (make_pair <unsigned int, int> (id, k->second));    }    if (verbose)       cerr << "## " << cnt.size() << " items found\n";    for (map <T, vector <pair <unsigned int, int> > >::iterator l = cnt.begin (); 	 l != cnt.end (); ) {      if (l->second.size() < minsup) {	map <T, vector <pair <unsigned int, int> > >::iterator tmp = l;	tmp = l;	++tmp;	cnt.erase (l);	l = tmp;      } else {	++l;      }    }    if (verbose)       cerr << "## pruned to " << cnt.size() << " items found\n";    if (cnt.size () == 0) {       if (minpat <= pattern.size()) {	  for (unsigned int i = 0; i < pattern.size(); i++)	    *os << (i ? " " : "") << pattern[i].first << delimiter << pattern[i].second;	  *os << endl;       }      return;    }    for (map <T, vector <pair <unsigned int, int> > >::iterator l = cnt.begin (); 	 l != cnt.end(); ++l) {      pattern.push_back (make_pair <T, unsigned int> (l->first, l->second.size()));      project (l->first, l->second);      pattern.erase (pattern.end());    }  }public:  PrefixSpan (T _init,	      unsigned int _minsup = 1, 	      unsigned int _minpat = 1, 	      string _delimiter = "/",	      bool _verbose = false):    init (_init), minsup(_minsup), minpat (_minpat), delimiter (_delimiter), verbose (_verbose) {};  ~PrefixSpan () {};  istream &read (istream &is)   {    string line;    vector <T> tmp;    while (getline (is, line)) {      split (line, " ", tmp);      transaction.push_back (tmp);    }    return is;  }  void run (ostream &_os)  {    os = &_os;    vector <pair <unsigned int, int> > root;    for (unsigned int i = 0; i < transaction.size(); i++)       root.push_back (make_pair (i, -1));    project (init, root);   }  void clear ()  {    transaction.clear ();    pattern.clear ();  }};int main (int argc, char **argv){  extern char *optarg;  bool verbose = false;  unsigned int minsup = 1;  unsigned int minpat = 1;  string delimiter = "/";  bool useint = true;  int opt;  while ((opt = getopt(argc, argv, "vsM:m:d:")) != -1) {    switch(opt) {    case 'v':      verbose = true;      break;    case 'm':      minsup = atoi (optarg);      break;    case 'M':      minpat = atoi (optarg);      break;    case 's':      useint = false;      break;    case 'd':       delimiter = string (optarg);       break;    default:      cout << "Usage: " << argv[0] << " [-m minsup] [-M minpat] [-v] [-s] [-d delimiter] < data .." << endl;      return -1;    }  }  if (useint) {    PrefixSpan<unsigned int, string2int> prefixspan (0, minsup, minpat, delimiter, verbose);    prefixspan.read (cin);    prefixspan.run  (cout);  } else {    PrefixSpan<string, string2string> prefixspan ("", minsup, minpat, delimiter, verbose);     prefixspan.read (cin);    prefixspan.run  (cout);  }  return 0;}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -