⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 docvectors.cpp

📁 聚类分析程序 k-means 编译环境 gcc/stl
💻 CPP
字号:
/*    Text Clustering  Copyright (C) 2004 Debora "Barbara" Donato, Antonio Gulli  This library is free software; you can redistribute it and/or modify it   under the terms of the GNU Lesser General Public License as published by   the Free Software Foundation; either version 2.1 of the License, or   (at your option) any later version.  This library is distributed in the hope that it will be useful, but   WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY   or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public   License for more details.  You should have received a copy of the GNU Lesser General Public License   along with this library; if not, write to the Free Software Foundation, Inc.,   59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */#include "docvectors.h" /* * class used for for heap opearation */class triple { public:  triple(DOCUMENT::iterator iter, DOCUMENT::iterator endIter, tuple * tp) :     it(iter), end(endIter), tuplePTR(tp){};  ~triple(){};  inline tuple * getTuple() const { return tuplePTR; }  DOCUMENT::iterator getIterator() const { return it; }  DOCUMENT::iterator endIterator() const { return end; }  bool operator< (const triple &a) const{ // operator for heap cmp    return   a.tuplePTR->getCoordinate() < this->tuplePTR->getCoordinate();  }  friend ostream& operator<<(ostream& os, const triple& t){        os << " tuple:" << *(t.tuplePTR)  << endl;    return os;  }private:  DOCUMENT::iterator it;       // the current vector iterator on list  DOCUMENT::iterator end;      // the end of current list  tuple * tuplePTR;            // a pointer to the associated tuple};  /**   * distance: compute the distance between two documents   * @param document_y, document to which we are computing distance   * @return The distance  */double document::distance(document &document_y){  DOCUMENT::iterator itFirstDoc = begin();  DOCUMENT::iterator itSecondDoc = document_y.begin();    float diff = 0.0;    while (itFirstDoc != end() && 	   itSecondDoc != document_y.end()){    #ifdef DEBUG_VECTOR    cout << "tuple(doc_x):" << *itFirstDoc << endl;    cout << "tuple(doc_y):" << *itSecondDoc << endl;#endif          if (itFirstDoc->getCoordinate() < itSecondDoc->getCoordinate()){            diff += (itFirstDoc->getValue() * itFirstDoc->getValue());      itFirstDoc++;    } else if (itFirstDoc->getCoordinate() > itSecondDoc->getCoordinate()){      diff += (itSecondDoc->getValue() * itSecondDoc->getValue());      itSecondDoc++;    } else {      	diff += (itFirstDoc->getValue() - itSecondDoc->getValue()) * 	  (itFirstDoc->getValue() - itSecondDoc->getValue());	itFirstDoc++;	itSecondDoc++;    }  }  while (itSecondDoc != document_y.end()){        diff += (itSecondDoc->getValue() * itSecondDoc->getValue());    itSecondDoc++;   }        while(itFirstDoc != end()){    diff += (itFirstDoc->getValue() * itFirstDoc->getValue());    itFirstDoc++;  }  diff = sqrt(diff);  return diff;}  /** * centroid: compute the distance between two documents * @param vector<int> indexes, the indexes of vectors * @param DOCUMENT The vector of centroid return */void vectorSpace::centroid(vector<int> indexes,  document &centroid){    vector<int>::iterator it;             // an iterator on indexes  priority_queue<triple> Q;             // a min-priority que  int last_coordinate, new_coordinate;  // last accessed coor    float feature;                        // a single feature value  tuple t;                              // a generic tuple  DOCUMENT::iterator dit, eit;          // a generic iterator  /***************************************************************************/  /*    For each document in the index, a triple is inserted in the heap     */  /***************************************************************************/      for (it = indexes.begin(); it != indexes.end(); it++){ // for all documents#ifdef DEBUG_VECTOR          cout << "tuple: " << *(vs.at(*it).begin()->getTuple()) << endl;#endif    Q.push(triple(		  vs.at(*it).begin(),             // the head of vector pointed by *it		  vs.at(*it).end(),               // the tail of vector pointed by *it		  vs.at(*it).begin()->getTuple()  // the pointer to the tuple for this vector		  ));  }      /***************************************************************************/  /*    Pick the first tuple of the first document in the heap.              */  /***************************************************************************/  last_coordinate = Q.top().getTuple()->getCoordinate(); // extract min from priority Q  feature = Q.top().getTuple()->getValue();              // the current feature#ifdef DEBUG_VECTOR          cout << "last_coordinate:" << last_coordinate << " v:" << feature << endl;#endif  dit = Q.top().getIterator();                           // current position iterator  eit = Q.top().endIterator();                           // end iterator  dit++;                                                 // go to the next#ifdef DEBUG_VECTOR          cout << "next tuple " << *dit << endl;#endif    Q.pop();                                               // pop from the list     if (dit != eit){                                     // not reached the end of the list      Q.push(triple(		    dit,                                 // the next tuple iterator		    eit,                                 // the end of this list		    dit->getTuple()                      // the pointer to the tuple		    ));    }    /***************************************************************************/  /*    For each other triple in the heap:                                   */  /*        if (same coordinate) accumulate feature values                   */  /*          else add the current coordinate to the centroid                */  /***************************************************************************/  while (!Q.empty()){                           if ( (new_coordinate = Q.top().getTuple()->getCoordinate()) 	 == last_coordinate){ // same old coordinate      feature += Q.top().getTuple()->getValue();           // accumulate value#ifdef DEBUG_VECTOR            cout << "c:" <<  Q.top().getTuple()->getCoordinate() << " v:"<< Q.top().getTuple()->getValue() <<endl;      cout << "\tsame last_coordinate:" << last_coordinate << " accumulated feature:" << feature << endl;#endif    } else {                                               // a change in coordinate      centroid.add(t.setTuple(last_coordinate,             // create the new tuple;				feature / 			      (float) indexes.size()));    // a new centroid's coordinate;#ifdef DEBUG_VECTOR            cout << "\tadd to centroid " << t << endl;#endif      last_coordinate = new_coordinate;                    // extract min from priority Q      feature = Q.top().getTuple()->getValue();    }    dit = Q.top().getIterator();                           // current position iterator    eit = Q.top().endIterator();                           // end iterator    dit++;                                                             // go to the next#ifdef DEBUG_VECTOR          cout << "next tuple " << *dit << endl;#endif    Q.pop();                                               // pop from the list     if (dit != eit){                                       // not reached the end of the list	Q.push(triple(		      dit,                                 // the next tuple iterator		      eit,                                 // the end of this list		      dit->getTuple()                      // the pointer to the tuple		      ));    }  }  centroid.add(t.setTuple(last_coordinate,             // create the new tuple;				feature / 			      (float) indexes.size()));    // a new centroid's coordinate;}  /**   * select_k_random_vector:    * @param unsigned int k, number of vectors   * @return The array of k indexes   */vector<vect_coordinate> vectorSpace::select_k_random_vectors(unsigned int k){  vector<vect_coordinate> v;  vect_coordinate n = vs.size();  vect_coordinate i, r;  map <vect_coordinate, bool> generated;  map <vect_coordinate, bool>::iterator it;  bool ok;  double randNum;    if (k > n){                                      // vector space is too small.    for (i = 0; i < n; i++){ v.push_back(i); }    return v;  }  srand(time(NULL));  for (i = n - k +1; i <= n ; i++){        ok = false;      do {	randNum = rand() / (RAND_MAX + 1.0);	r =  (int) (n * randNum);      // generate a random number in [0,n)	//cout << "for vs.size()=" << n << " and k=" << k << " generated r=" << r << " rand=" << randNum;	it = generated.find(r);                        // already generated ?	if (it == generated.end()){                    // not!	  generated.insert(make_pair(r, true));        //  now yes	  v.push_back(r);                              // save this coordinate	  //cout << " saved";	  ok = true;	}	cout << endl;      } while (!ok);                                   // try another  }  return v;}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -