📄 k-mean-clusters.h

📁 聚类分析程序 k-means 编译环境 gcc/stl
💻 H
字号:
/*    Text Clustering  Copyright (C) 2004 Debora "Barbara" Donato, Antonio Gulli  This library is free software; you can redistribute it and/or modify it   under the terms of the GNU Lesser General Public License as published by   the Free Software Foundation; either version 2.1 of the License, or   (at your option) any later version.  This library is distributed in the hope that it will be useful, but   WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY   or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public   License for more details.  You should have received a copy of the GNU Lesser General Public License   along with this library; if not, write to the Free Software Foundation, Inc.,   59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */#ifndef KMEANCLUSTERS#define KMEANCLUSTERS 1#include "Clustering.h"#include "docvectors.h"#include "dictionary.h"using namespace std;#include <string>/* * class DocumentDistanceMap *    Note:  -- used for rapid document (address) update *           -- mantain the distance from each document (to the centroid, or whatever) *           */ typedef map<document *, double> DOC_DIST_MAP;class DocumentsDistancesMap{                      private:  DOC_DIST_MAP m;       // contain the association document distance (for a cluster)public:  typedef std::pair<document *, double> doc_distance_t;  // needed to mantain a pair  inline bool insert(document * dptr, double d){     //m[dptr] = d;                                // insert return a pair<iterator, bool>    return (m.insert(make_pair(dptr,d))).second;  // second is a bool, true if inserted / false o.w.  }  inline bool find(document * dptr, double& value){    DOC_DIST_MAP::iterator it = m.find(dptr);    if (it != m.end()) { value=it->second; return true;}    else { return false; }  }  inline bool find(document * dptr){    DOC_DIST_MAP::iterator it = m.find(dptr);    if (it != m.end()) { return true;}    else { return false; }  }  inline bool remove(document * dptr){    DOC_DIST_MAP::iterator it = m.find(dptr);    if (it != m.end()) { m.erase(it); return true;}    else { return false; }  }  inline int size() {return m.size(); }  inline DOC_DIST_MAP::iterator begin(){ return m.begin();}  inline DOC_DIST_MAP::iterator end(){ return m.end();}};/* * class kmClusters used to store the k clusters. *    Note:  -- a cluster is a centroid and an docdistance obj (a map) *           -- clusters can overlap *           */ class kmCluster {   private:  document centroid;                // the cluster centroid  DocumentsDistancesMap documents;       // the indexes of documents in this clusters public:  inline void setCentroid(const document& c) { centroid.clear(); centroid = c;} // set centroid  inline document&  getCentroid(void) { return centroid;}    // get centroid    inline bool addDocument(document*  docPTR) { return documents.insert(docPTR, 0.0); }  inline bool addDocument(document*  docPTR, double dist) { return documents.insert(docPTR, dist); }  inline bool removeDocument(document* docPTR) {return documents.remove(docPTR); }  inline bool isThereDocument(document* docPTR) { return documents.find(docPTR); }  inline double getDistanceFromCentroid(document * docPTR) {   // distance from centroid    double dist; documents.find(docPTR, dist); return dist; }    inline unsigned int size(void) { return documents.size(); }  // size of the cluster  void do_centroid(void);            // compute the centroid };class KMEANS : public Clustering{ private:  dictionary * d;                             // the dictionary  vectorSpace vs;                             // the vector space  document a_doc;                             // an auxiliar document public:  KMEANS() : d(new simpleDictionary) {};  ~KMEANS();  void initDataSet(void) {docID = 0;}  /************************************************************   *   * loadInDataSet: was abstract it load a string in dataset   *   ************************************************************/ void loadInDataSet(string& str){   dict_id id_word;   if (! (id_word = d->get(str))){       // not in lexicon     id_word = d->add(str);              // add it   }    a_doc.addHash(id_word); } /*********************************************************************   *   * loadInDataSet: was abstract it concludes the loading of a document   *   ********************************************************************/ void finalizeLoadingDocument(void){         a_doc.HashToVector();                 // vectorize the document   vs.pushVector(a_doc);                 // keep vector space populated   cout << a_doc;       a_doc.clear(); } void do_clustering(void);               // lets start to cluster // determines the k hash functions (for shingling, here it is a dummy) void initialize(void) {};};#endif
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -