📄 gist_amdb.cc

📁 Libgist is an implementation of the Generalized Search Tree, a template index structure that makes i
💻 CC
📖 第 1 页 / 共 3 页
字号:
12 3 下一页
// gist_amdb.cc// Copyright (c) 1997, 1998, Regents of the University of California// $Id: gist_amdb.cc,v 1.12 2000/03/15 00:24:24 mashah Exp $// VCPORT_B#ifdef WIN32#pragma warning(disable:4786) // Templates can cause names to get too long for							  // debug information. Disables this warning.#endif// VCPORT_E#include <math.h>#include <stdlib.h>#include <stdio.h>#include "gist_compat.h"	// for MAXDOUBLE#include "gist_defs.h"#include "gist_ext.h"#include "gist_cursor.h"#include "gist_cursorext.h"#include "gist_disppredcursor.h"#include "gist_ustk.h"#include "amdb_wkldprofile.h"#include "amdb_treemap.h"#include "amdb_ext.h"#include "gist.h"// STL// VCPORT_B#if (defined WIN32) || (__GNUG__==3)#include <iostream>#include <fstream>#include <vector>#include <algorithm>using namespace std;#else#include <iostream.h>#include <fstream.h>#include <vector.h>#include <algo.h>#endif// VCPORT_Eamdb_ext_t* amdb_ext_t::amdb_ext_list[gist_ext_t::gist_numext];// debugvoid pclust(amdb_clustering::Clustering* clust, int beg, int end){    if (end == 0 || end >= clust->size()) {        end = clust->size() - 1;    }    amdb_clustering::ClusterInfoVect::iterator it = clust->info.begin() + beg;    int i;    for (i = beg; i <= end; i++, it++) {        cout << i << " [i: " << it->itemNo << "  c: " << it->clusterNo;	cout << "] ";    }    cout << endl; // flush to see on screen}/////////////////////////////////////////////////////////////////////////// amdb_ext_t - Constructor//// Description://	- registers amdb extension in extension list///////////////////////////////////////////////////////////////////////////amdb_ext_t::amdb_ext_t(    gist_ext_t::gist_ext_ids 	id,    PenaltyFct 			p,    DisplayFct 			d,    DisplayPredsFct 		dp,    ConsistentFct		consistent)    : id(id), penalty(p), display(d), displayPreds(dp), consistent(consistent){    amdb_ext_list[id] = this;}/////////////////////////////////////////////////////////////////////////// gist::_optBreakNotify - _build_level inserted a page break//// Description://	- _build_level inserted a page break after 'itemCnt' items//	  that is not part of our clustering, we must now update the clustering//	- increase cluster numbers for all items starting from itemCnt+1 (in //	  optClustering) by 1/////////////////////////////////////////////////////////////////////////voidgist::_optBreakNotify(    int		itemCnt,    void*	param){    _OptTreeBuildState* state = (_OptTreeBuildState *) param;    amdb_clustering::ClusterInfoVect::iterator it;    for (it = state->optClustering->info.begin() + itemCnt;        it != state->optClustering->info.end(); it++) {	it->clusterNo++;    }}/////////////////////////////////////////////////////////////////////////// gist::_constructBpMap - construct treemap for BP file stream//// Description://	- read bpStream from beginning to end and record all BPs//	  in map (only read the recorded key lengths and skip the keys)/////////////////////////////////////////////////////////////////////////voidgist::_constructBpMap(    ifstream&		bpStream,    amdb_treemap&	bpMap){    bpStream.seekg(0); // start from beginning    int itemCnt = 0;    for (;;) {        streampos pos = bpStream.tellg(); // position of next BP	int klen;// VCPORT_B#ifdef WIN32	bpStream.read((char *) &klen, sizeof(klen));#else	bpStream.read((void *) &klen, sizeof(klen));#endif// VCPORT_E	if (bpStream.fail()) {	    // we seeked past EOF: we're done	    break;	}	int size = gist_p::rec_size(klen, sizeof(shpid_t));	bpMap.addFileItem(pos, size);	itemCnt++;	// skip to next BP	bpStream.seekg(klen + sizeof(shpid_t), ios::cur);    }    bpStream.clear(); // reset state for scan in _optBpStream()    bpStream.seekg(0); // ... and go back to beginning}/////////////////////////////////////////////////////////////////////////// gist::_initLeafBuildState - initialize state for _optItemStream()//// Description://	- sort the optimal clustering we obtain from the profile in//	  ascending cluster number order//	- our leafItemMap is also our optimal clustering, since we're //	  dealing with leaf-level items/////////////////////////////////////////////////////////////////////////voidgist::_initLeafBuildState(    const amdb_clustering::Clustering* optClustering, // in    const amdb_treemap* tmap, // in    _OptTreeBuildState&	    state) // out{    state.level = 0;    state.leafItemMap = new amdb_clustering::Clustering();    state.leafItemMap->copy(*optClustering);    state.tupMap = tmap;    state.deallocMap = false;    state.optClustering = state.leafItemMap;    state.optClustering->sortClusterNo();}/////////////////////////////////////////////////////////////////////////// gist::_OptTreeBuildState::update - update leafItemMap with optClustering//// Description://	- after building an internal level, we need to update leafItemMap//	  to reflect the clustering at the level just build, which //	  is the input for the next level//	- also, delete tupMap and optClustering///////////////////////////////////////////////////////////////////////////voidgist::_OptTreeBuildState::update(){    if (level == 0) {	// nothing to update or deallocate for leaf level        return;    }    // we just built an internal level:    // update leafItemMap with the clustering of this    // level    optClustering->sortItemNo();	// the itemNo of optClustering is the current clusterNo of leafItemMap    amdb_clustering::ClusterInfoVect::iterator it;    for (it = leafItemMap->info.begin(); it != leafItemMap->info.end(); it++) {	if (it->clusterNo != amdb_clustering::invalidNo) {	    it->clusterNo = (*optClustering)[it->clusterNo].clusterNo;	}    }    // clean up stuff from previous level    clear();}/////////////////////////////////////////////////////////////////////////// gist::_initInternalBuildState - initialize state for _optBpStream()//// Description://	- if the previous level was the leaf level, sort leafItemMap on//	  the itemNo//	- produce an optimal clustering of the items of this level,//	  which is sorted on the clusterNo and stored in optClustering///////////////////////////////////////////////////////////////////////////rc_tgist::_initInternalBuildState(    _OptTreeBuildState&	    state){    if (state.level == 0) {        // we just finished the leaf level, for which we sorted leafItemMap	// on the clusterNo; we go back to the itemNo ordering now in order	// to allow us to project the query result sets	state.leafItemMap->sortItemNo();    }    state.level++;    amdb_clustering::ResultSet* resultSets = NULL;    state.profile->projectToClusters(*state.leafItemMap, resultSets);    amdb_treemap* bpMap = new amdb_treemap();    _constructBpMap(*state.bpStream, *bpMap);    // cluster BPs according to traversal patterns in workload (equal-sized     // partitions)    amdb_clustering::Clustering* optClustering = new amdb_clustering::Clustering();    int numRetrieved, numClusters;    W_DO(amdb_clustering::optClustering(resultSets, state.profile->queries.size(),         state.fillFactor, 0, 10, 1, *bpMap, *optClustering, numClusters, numRetrieved));    // we must traverse every BP    assert(numRetrieved == bpMap->itemMap.size());    // sort the optimal clustering on the cluster number, we return    // tuples in that order    if (state.optClustering != state.leafItemMap) {	// we allocated this in a previous call        delete state.optClustering;    }    state.optClustering = optClustering;    state.optClustering->sortClusterNo();    state.tupMap = bpMap;    state.deallocMap = true;    // don't need resultSets anymore    amdb_clustering::freeResultSets(resultSets, state.profile->queries.size());    return (RCOK);}/////////////////////////////////////////////////////////////////////////// gist::_optBpStream - returns BPs in order of optimal clustering//// Description://// Return Values://      RCOK/////////////////////////////////////////////////////////////////////////rc_tgist::_optBpStream(    void* 	key,    int&	klen,    void* 	data,    int&	dlen,    shpid_t&	child,    void* 	param){    _OptTreeBuildState* state = (_OptTreeBuildState *) param;    if (state->nextItem == state->optClustering->size()) {        // no more tuples left, we're done	return (eEOF);    }    const amdb_clustering::ClusterInfo* cinfo = &(*state->optClustering)[state->nextItem];    // see if we crossed a page boundary    if (state->nextItem > 0) {        if ((*state->optClustering)[state->nextItem - 1].clusterNo != cinfo->clusterNo) {	    if (!state->returnedBreak) {		// we crossed a page boundary and haven't returned a page break to 		// _build_level yet; we'll do that now (without advancing nextItem)		state->returnedBreak = true;		klen = 0;		return(RCOK);	    } else {		// we already returned the break, we must now return the next tuple		state->returnedBreak = false;	    }	}    }    // fetch next item from bpStream    state->nextItem++;    const amdb_treemap::RecInfo *info =        state->tupMap->itemInfo(cinfo->itemNo);    ifstream& s = *state->bpStream;    s.seekg(info->loc.fileLoc, ios::beg);// VCPORT_B#ifdef WIN32	s.read((char *) &klen, sizeof(klen));#else    s.read((void *) &klen, sizeof(klen));#endif// VCPORT_E    if (s.bad()) {	// bad: we seeked past EOF        return(eFILEERROR);    }// VCPORT_B#ifdef WIN32	s.read((char *)key, klen);#else	s.read(key, klen);#endif// VCPORT_E        dlen = 0;// VCPORT_B#ifdef WIN32    s.read((char *) &child, sizeof(child));#else    s.read((void *) &child, sizeof(child));#endif// VCPORT_E    if (!(s.eof() || s.good())) {        return(eFILEERROR);    }    return(RCOK);}voidtstr(ifstream& s, streampos loc){    int x;    streampos oldpos = s.tellg();    s.seekg(loc);// VCPORT_B#ifdef WIN32	s.read((char *) &x, sizeof(x));#else    s.read((void *) &x, sizeof(x));#endif// VCPORT_E    streampos pos = s.tellg();    cout << x << endl;    s.seekg(oldpos);}/////////////////////////////////////////////////////////////////////////// gist::_optItemStream - returns items in order of optimal clustering//// Description://	- optClustering of profile might contain non-retrieved items (clusterNo//	  == invalidNo), we need to skip those//// Return Values://      RCOK: with klen == 0: force page break, klen != 0: return tuple//	eEOF: no more tuples/////////////////////////////////////////////////////////////////////////rc_tgist::_optItemStream(    void* 	key,    int&	klen,    void* 	data,    int&	dlen,    shpid_t&	child,    void* 	param){    _OptTreeBuildState* state = (_OptTreeBuildState *) param;    if (state->nextItem == state->leafItemMap->size()) {
12 3 下一页
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -