📄 gist_amdb.cc
字号:
// gist_amdb.cc// Copyright (c) 1997, 1998, Regents of the University of California// $Id: gist_amdb.cc,v 1.12 2000/03/15 00:24:24 mashah Exp $// VCPORT_B#ifdef WIN32#pragma warning(disable:4786) // Templates can cause names to get too long for // debug information. Disables this warning.#endif// VCPORT_E#include <math.h>#include <stdlib.h>#include <stdio.h>#include "gist_compat.h" // for MAXDOUBLE#include "gist_defs.h"#include "gist_ext.h"#include "gist_cursor.h"#include "gist_cursorext.h"#include "gist_disppredcursor.h"#include "gist_ustk.h"#include "amdb_wkldprofile.h"#include "amdb_treemap.h"#include "amdb_ext.h"#include "gist.h"// STL// VCPORT_B#if (defined WIN32) || (__GNUG__==3)#include <iostream>#include <fstream>#include <vector>#include <algorithm>using namespace std;#else#include <iostream.h>#include <fstream.h>#include <vector.h>#include <algo.h>#endif// VCPORT_Eamdb_ext_t* amdb_ext_t::amdb_ext_list[gist_ext_t::gist_numext];// debugvoid pclust(amdb_clustering::Clustering* clust, int beg, int end){ if (end == 0 || end >= clust->size()) { end = clust->size() - 1; } amdb_clustering::ClusterInfoVect::iterator it = clust->info.begin() + beg; int i; for (i = beg; i <= end; i++, it++) { cout << i << " [i: " << it->itemNo << " c: " << it->clusterNo; cout << "] "; } cout << endl; // flush to see on screen}/////////////////////////////////////////////////////////////////////////// amdb_ext_t - Constructor//// Description:// - registers amdb extension in extension list///////////////////////////////////////////////////////////////////////////amdb_ext_t::amdb_ext_t( gist_ext_t::gist_ext_ids id, PenaltyFct p, DisplayFct d, DisplayPredsFct dp, ConsistentFct consistent) : id(id), penalty(p), display(d), displayPreds(dp), consistent(consistent){ amdb_ext_list[id] = this;}/////////////////////////////////////////////////////////////////////////// gist::_optBreakNotify - _build_level inserted a page break//// Description:// - _build_level inserted a page break after 'itemCnt' items// that is not part of our clustering, we must now update the clustering// - increase cluster numbers for all items starting from itemCnt+1 (in // optClustering) by 1/////////////////////////////////////////////////////////////////////////voidgist::_optBreakNotify( int itemCnt, void* param){ _OptTreeBuildState* state = (_OptTreeBuildState *) param; amdb_clustering::ClusterInfoVect::iterator it; for (it = state->optClustering->info.begin() + itemCnt; it != state->optClustering->info.end(); it++) { it->clusterNo++; }}/////////////////////////////////////////////////////////////////////////// gist::_constructBpMap - construct treemap for BP file stream//// Description:// - read bpStream from beginning to end and record all BPs// in map (only read the recorded key lengths and skip the keys)/////////////////////////////////////////////////////////////////////////voidgist::_constructBpMap( ifstream& bpStream, amdb_treemap& bpMap){ bpStream.seekg(0); // start from beginning int itemCnt = 0; for (;;) { streampos pos = bpStream.tellg(); // position of next BP int klen;// VCPORT_B#ifdef WIN32 bpStream.read((char *) &klen, sizeof(klen));#else bpStream.read((void *) &klen, sizeof(klen));#endif// VCPORT_E if (bpStream.fail()) { // we seeked past EOF: we're done break; } int size = gist_p::rec_size(klen, sizeof(shpid_t)); bpMap.addFileItem(pos, size); itemCnt++; // skip to next BP bpStream.seekg(klen + sizeof(shpid_t), ios::cur); } bpStream.clear(); // reset state for scan in _optBpStream() bpStream.seekg(0); // ... and go back to beginning}/////////////////////////////////////////////////////////////////////////// gist::_initLeafBuildState - initialize state for _optItemStream()//// Description:// - sort the optimal clustering we obtain from the profile in// ascending cluster number order// - our leafItemMap is also our optimal clustering, since we're // dealing with leaf-level items/////////////////////////////////////////////////////////////////////////voidgist::_initLeafBuildState( const amdb_clustering::Clustering* optClustering, // in const amdb_treemap* tmap, // in _OptTreeBuildState& state) // out{ state.level = 0; state.leafItemMap = new amdb_clustering::Clustering(); state.leafItemMap->copy(*optClustering); state.tupMap = tmap; state.deallocMap = false; state.optClustering = state.leafItemMap; state.optClustering->sortClusterNo();}/////////////////////////////////////////////////////////////////////////// gist::_OptTreeBuildState::update - update leafItemMap with optClustering//// Description:// - after building an internal level, we need to update leafItemMap// to reflect the clustering at the level just build, which // is the input for the next level// - also, delete tupMap and optClustering///////////////////////////////////////////////////////////////////////////voidgist::_OptTreeBuildState::update(){ if (level == 0) { // nothing to update or deallocate for leaf level return; } // we just built an internal level: // update leafItemMap with the clustering of this // level optClustering->sortItemNo(); // the itemNo of optClustering is the current clusterNo of leafItemMap amdb_clustering::ClusterInfoVect::iterator it; for (it = leafItemMap->info.begin(); it != leafItemMap->info.end(); it++) { if (it->clusterNo != amdb_clustering::invalidNo) { it->clusterNo = (*optClustering)[it->clusterNo].clusterNo; } } // clean up stuff from previous level clear();}/////////////////////////////////////////////////////////////////////////// gist::_initInternalBuildState - initialize state for _optBpStream()//// Description:// - if the previous level was the leaf level, sort leafItemMap on// the itemNo// - produce an optimal clustering of the items of this level,// which is sorted on the clusterNo and stored in optClustering///////////////////////////////////////////////////////////////////////////rc_tgist::_initInternalBuildState( _OptTreeBuildState& state){ if (state.level == 0) { // we just finished the leaf level, for which we sorted leafItemMap // on the clusterNo; we go back to the itemNo ordering now in order // to allow us to project the query result sets state.leafItemMap->sortItemNo(); } state.level++; amdb_clustering::ResultSet* resultSets = NULL; state.profile->projectToClusters(*state.leafItemMap, resultSets); amdb_treemap* bpMap = new amdb_treemap(); _constructBpMap(*state.bpStream, *bpMap); // cluster BPs according to traversal patterns in workload (equal-sized // partitions) amdb_clustering::Clustering* optClustering = new amdb_clustering::Clustering(); int numRetrieved, numClusters; W_DO(amdb_clustering::optClustering(resultSets, state.profile->queries.size(), state.fillFactor, 0, 10, 1, *bpMap, *optClustering, numClusters, numRetrieved)); // we must traverse every BP assert(numRetrieved == bpMap->itemMap.size()); // sort the optimal clustering on the cluster number, we return // tuples in that order if (state.optClustering != state.leafItemMap) { // we allocated this in a previous call delete state.optClustering; } state.optClustering = optClustering; state.optClustering->sortClusterNo(); state.tupMap = bpMap; state.deallocMap = true; // don't need resultSets anymore amdb_clustering::freeResultSets(resultSets, state.profile->queries.size()); return (RCOK);}/////////////////////////////////////////////////////////////////////////// gist::_optBpStream - returns BPs in order of optimal clustering//// Description://// Return Values:// RCOK/////////////////////////////////////////////////////////////////////////rc_tgist::_optBpStream( void* key, int& klen, void* data, int& dlen, shpid_t& child, void* param){ _OptTreeBuildState* state = (_OptTreeBuildState *) param; if (state->nextItem == state->optClustering->size()) { // no more tuples left, we're done return (eEOF); } const amdb_clustering::ClusterInfo* cinfo = &(*state->optClustering)[state->nextItem]; // see if we crossed a page boundary if (state->nextItem > 0) { if ((*state->optClustering)[state->nextItem - 1].clusterNo != cinfo->clusterNo) { if (!state->returnedBreak) { // we crossed a page boundary and haven't returned a page break to // _build_level yet; we'll do that now (without advancing nextItem) state->returnedBreak = true; klen = 0; return(RCOK); } else { // we already returned the break, we must now return the next tuple state->returnedBreak = false; } } } // fetch next item from bpStream state->nextItem++; const amdb_treemap::RecInfo *info = state->tupMap->itemInfo(cinfo->itemNo); ifstream& s = *state->bpStream; s.seekg(info->loc.fileLoc, ios::beg);// VCPORT_B#ifdef WIN32 s.read((char *) &klen, sizeof(klen));#else s.read((void *) &klen, sizeof(klen));#endif// VCPORT_E if (s.bad()) { // bad: we seeked past EOF return(eFILEERROR); }// VCPORT_B#ifdef WIN32 s.read((char *)key, klen);#else s.read(key, klen);#endif// VCPORT_E dlen = 0;// VCPORT_B#ifdef WIN32 s.read((char *) &child, sizeof(child));#else s.read((void *) &child, sizeof(child));#endif// VCPORT_E if (!(s.eof() || s.good())) { return(eFILEERROR); } return(RCOK);}voidtstr(ifstream& s, streampos loc){ int x; streampos oldpos = s.tellg(); s.seekg(loc);// VCPORT_B#ifdef WIN32 s.read((char *) &x, sizeof(x));#else s.read((void *) &x, sizeof(x));#endif// VCPORT_E streampos pos = s.tellg(); cout << x << endl; s.seekg(oldpos);}/////////////////////////////////////////////////////////////////////////// gist::_optItemStream - returns items in order of optimal clustering//// Description:// - optClustering of profile might contain non-retrieved items (clusterNo// == invalidNo), we need to skip those//// Return Values:// RCOK: with klen == 0: force page break, klen != 0: return tuple// eEOF: no more tuples/////////////////////////////////////////////////////////////////////////rc_tgist::_optItemStream( void* key, int& klen, void* data, int& dlen, shpid_t& child, void* param){ _OptTreeBuildState* state = (_OptTreeBuildState *) param; if (state->nextItem == state->leafItemMap->size()) {
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -