📄 docvectors.h
字号:
/* Text Clustering Copyright (C) 2004 Debora "Barbara" Donato, Antonio Gulli This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation; either version 2.1 of the License, or (at your option) any later version. This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with this library; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */#ifndef DOCVECTOR#define DOCVECTOR 1//#define DEBUG_VECTOR 1#include <vector>#include <algorithm>#include <functional>#include <queue>#include <map>#include <iostream>#include <math.h>using namespace std;typedef unsigned int vect_coordinate;/* * class used for for creating a tuple */class tuple {public: tuple() : coordinate(0), value(0){}; ~tuple(){}; tuple(int c, float v) : coordinate(c), value(v){}; inline vect_coordinate getCoordinate() const{ return coordinate; } inline float getValue() const { return value; } inline tuple setTuple(vect_coordinate c, float v){ coordinate=c; value=v; return *this; } inline tuple * getTuple(){ return this; } void show(const tuple& t){ cout << "c:" << t.coordinate << " v:" << t.value; } friend ostream& operator<<(ostream& os, const tuple& t){ os << "c:" << t.coordinate << " v:" << t.value; return os; } private: vect_coordinate coordinate; float value;};typedef vector<tuple> DOCUMENT;/* * class used for for creating a document (a sparse vector of tuples) */class document{private: DOCUMENT v; // a sparse vector of tuples map<vect_coordinate, unsigned int> m; // an hash for storing document, before the vector creation public: document(DOCUMENT vett) : v(vett) {}; document(){} ~document(){}; inline DOCUMENT::iterator begin() { return v.begin(); } // iterator inline DOCUMENT::iterator end() { return v.end(); } // iterator /** * add: addHash increment the value associate to the key * * @vect_coordinate, this is looked up in the dictionary */ inline void addHash(vect_coordinate c){ m[c]++ ;} /** * clear: clear a map */ inline void clearHash() { m.clear(); } /** * mapToVector: trasform the map into a vector * assume a clear vector and clears the map */ inline void HashToVector(void){ tuple t; for(map<vect_coordinate, unsigned int>::iterator it = m.begin(); it != m.end(); it++){ t.setTuple(it->first, it->second); // map stores in increasing values of key add(&t); // so the vector is sorted by coordinates #ifdef DEBUG_VECTOR cout << "map scan c:"<< it->first << " v:" << it->second << endl;#endif } m.clear(); } /** * add: add a tuple to a vector * * @param tuple * , a ptr to tuple to be added */ inline void add (tuple * t){ v.push_back(*t); } inline void add (tuple t){ v.push_back(t); } /** * clear: clear a vector */ inline void clear() { v.clear(); } /** * size: size of the document */ inline unsigned int size(){ return v.size();} /** * distance: compute the distance between two documents * @param document_x, first document id * @param document_y, second document id * @return The distance */ double distance(document &document_y); struct less_tuple : public binary_function<tuple, tuple, bool>{ bool operator()(tuple x, tuple y){ return x.getCoordinate() < y.getCoordinate(); } }; /** * sortVector: sort a vector in vector space. * all vector are assumed to be sorted */ inline void sortVector (){ sort(v.begin(), v.end(), less_tuple()); } friend ostream& operator<<(ostream& os, document& d){ os << "Vector ... " << endl; for (DOCUMENT::iterator it = d.begin() ; it != d.end(); it++){ os << " " << (*it) << " " << endl; } os << endl; return os; } void printVector(){ for (DOCUMENT::iterator doc_iter=v.begin(); doc_iter != v.end(); doc_iter++){ cout << " " <<(*doc_iter) << " " << endl; } }};/* * class used for for creating a vector space, we adopt a sparse reppresentation * * V = [(coordinate_1, value_1), ...., (coordinate_r, value_r)] * * NOTE: ALL VECTORS ARE ASSUMED TO BE SORTED FOR EFFICIENT * DISTANCE AND CENTROID COMPUTATION */class vectorSpace{ private: vector<document> vs; // the vector space is a vector of document(s) public: vectorSpace(){vs.clear();} ~vectorSpace(){} /** * at: access the vectro space * @param vect_coordinate */ inline document at(vect_coordinate i) { return vs.at(i); } inline vector<document>::iterator begin() {return vs.begin();} inline vector<document>::iterator end() {return vs.end();} /** * preAllocate: preallocate some dimension on vectorSpace * @param int, dimentsion */ void preAllocate(int dim){ vs.reserve(dim); } /** * pushVector: push a document in the vector space * @param document, the term document's vector */ void pushVector(document d){ vs.push_back(d) ;}/** * size: return VectorSpace size; * @return size of vector space */ int size() {return vs.size();} /** * centroid: compute the distance between two documents * @param vector<int> indexes, the indexes of vectors * @param DOCUMENT The vector of centroid return */ void centroid(vector<int> indexes, document ¢roid); /** * select_k_random_vector: * @param unsigned int k, number of vectors * @return The array of k indexes */ vector<vect_coordinate> select_k_random_vectors(unsigned int k);};#endif
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -