⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 docvectors.h

📁 聚类分析程序 k-means 编译环境 gcc/stl
💻 H
字号:
/*    Text Clustering  Copyright (C) 2004 Debora "Barbara" Donato, Antonio Gulli  This library is free software; you can redistribute it and/or modify it   under the terms of the GNU Lesser General Public License as published by   the Free Software Foundation; either version 2.1 of the License, or   (at your option) any later version.  This library is distributed in the hope that it will be useful, but   WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY   or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public   License for more details.  You should have received a copy of the GNU Lesser General Public License   along with this library; if not, write to the Free Software Foundation, Inc.,   59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */#ifndef DOCVECTOR#define DOCVECTOR 1//#define DEBUG_VECTOR 1#include <vector>#include <algorithm>#include <functional>#include <queue>#include <map>#include <iostream>#include <math.h>using namespace std;typedef unsigned int vect_coordinate;/* * class used for for creating a tuple */class tuple {public:  tuple() : coordinate(0), value(0){};  ~tuple(){};  tuple(int c, float v) : coordinate(c), value(v){};    inline vect_coordinate getCoordinate() const{ return coordinate; }  inline float getValue() const { return value; }  inline tuple setTuple(vect_coordinate c, float v){ coordinate=c; value=v; return *this; }  inline tuple * getTuple(){ return this; }  void show(const tuple& t){    cout << "c:" << t.coordinate << " v:" << t.value;  }  friend ostream& operator<<(ostream& os, const tuple& t){        os << "c:" << t.coordinate << " v:" << t.value;    return os;  }  private:  vect_coordinate coordinate;  float value;};typedef vector<tuple> DOCUMENT;/* * class used for for creating a document (a sparse vector of tuples) */class document{private:  DOCUMENT v;                           // a sparse vector of tuples  map<vect_coordinate, unsigned int> m; // an hash for storing document, before the vector creation    public:  document(DOCUMENT vett) : v(vett) {};  document(){}  ~document(){};    inline DOCUMENT::iterator begin() { return v.begin(); }  // iterator  inline DOCUMENT::iterator end()  { return v.end(); }      // iterator     /**   * add: addHash increment the value associate to the key   *    * @vect_coordinate, this is looked up in the dictionary   */  inline void addHash(vect_coordinate c){ m[c]++ ;}  /**   * clear: clear a map   */    inline void clearHash() { m.clear(); }    /**   * mapToVector: trasform the map into a vector   *              assume a clear vector and clears the map   */  inline void HashToVector(void){        tuple t;    for(map<vect_coordinate, unsigned int>::iterator it = m.begin(); it != m.end(); it++){            t.setTuple(it->first, it->second);    // map stores in increasing values of key      add(&t);                              // so the vector is sorted by coordinates  #ifdef DEBUG_VECTOR      cout << "map scan c:"<< it->first << " v:" << it->second << endl;#endif    }    m.clear();  }  /**   * add: add a tuple to a vector   *    * @param tuple * , a ptr to tuple to be added   */  inline void add (tuple * t){ v.push_back(*t); }  inline void add (tuple t){ v.push_back(t); }  /**   * clear: clear a vector   */    inline void clear() { v.clear(); }  /**   * size: size of the document   */  inline unsigned int size(){ return v.size();}  /**   * distance: compute the distance between two documents   * @param document_x, first document id   * @param document_y, second document id   * @return The distance  */  double distance(document &document_y);  struct less_tuple : public binary_function<tuple, tuple, bool>{        bool operator()(tuple x, tuple y){ return x.getCoordinate() < y.getCoordinate(); }  };  /**   * sortVector: sort a vector in vector space.   *             all vector are assumed to be sorted   */  inline void sortVector (){ sort(v.begin(), v.end(), less_tuple()); }  friend ostream& operator<<(ostream& os, document& d){    os << "Vector ... " << endl;        for (DOCUMENT::iterator it = d.begin() ; it != d.end(); it++){        os << " " << (*it) << " " << endl;     }    os << endl;        return os;  }  void printVector(){    for (DOCUMENT::iterator doc_iter=v.begin(); doc_iter != v.end(); doc_iter++){      cout << " " <<(*doc_iter) << " " << endl;    }  }};/* * class used for for creating a vector space, we adopt a sparse reppresentation * *   V = [(coordinate_1, value_1), ...., (coordinate_r, value_r)] * *   NOTE: ALL VECTORS ARE ASSUMED TO BE SORTED FOR EFFICIENT  *         DISTANCE AND CENTROID COMPUTATION */class vectorSpace{ private:  vector<document> vs;        // the vector space is a vector of document(s) public:  vectorSpace(){vs.clear();}  ~vectorSpace(){}  /**   * at:  access the vectro space   * @param vect_coordinate  */  inline document at(vect_coordinate i) { return vs.at(i); }  inline vector<document>::iterator begin() {return vs.begin();}  inline vector<document>::iterator end() {return vs.end();}  /**   * preAllocate: preallocate some dimension on vectorSpace   * @param int, dimentsion  */  void preAllocate(int dim){ vs.reserve(dim); }  /**   * pushVector: push a document in the vector space   * @param document, the term document's vector  */  void pushVector(document d){ vs.push_back(d) ;}/** * size: return VectorSpace size; * @return size of vector space */  int size() {return vs.size();}  /**   * centroid: compute the distance between two documents   * @param vector<int> indexes, the indexes of vectors   * @param DOCUMENT The vector of centroid return   */  void centroid(vector<int> indexes,  document &centroid);  /**   * select_k_random_vector:    * @param unsigned int k, number of vectors   * @return The array of k indexes   */  vector<vect_coordinate> select_k_random_vectors(unsigned int k);};#endif

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -