📄 gist_amdb.cc
字号:
// no more tuples left, we're done return (eEOF); } // skip non-retrieved items (with clusterno == invalidNo == -1): // they precede all retrieved items in leafItemMap const amdb_clustering::ClusterInfo* cinfo; if (state->nextItem == 0) { do { cinfo = &(*state->leafItemMap)[state->nextItem]; state->nextItem++; } while (cinfo->clusterNo == amdb_clustering::invalidNo && state->nextItem < state->leafItemMap->size()); if (state->nextItem == state->leafItemMap->size()) { // there were no retrieved items return(eEOF); } // rewind nextItem to point to current item state->nextItem--; } else { // fetch the next item cinfo = &(*state->leafItemMap)[state->nextItem]; } // see if we crossed a page boundary (and the previous item wasn't skipped // because it has an invalid cluster number) if (state->nextItem > 0) { if ((*state->leafItemMap)[state->nextItem - 1].clusterNo != cinfo->clusterNo && (*state->leafItemMap)[state->nextItem - 1].clusterNo != amdb_clustering::invalidNo) { if (!state->returnedBreak) { // we crossed a page boundary and haven't returned a page break to // _build_level yet; we'll do that now (without advancing nextItem) state->returnedBreak = true; klen = 0; return (RCOK); } else { // we already returned the break, we must now return the next tuple state->returnedBreak = false; } } } // safe to advance nextItem state->nextItem++; // fetch next item from its leaf page const amdb_treemap::RecInfo *info = state->tupMap->itemInfo(cinfo->itemNo); gist_p page; W_DO(_static_fix_page(*state->file, page, info->loc.treeLoc.page, LATCH_SH)); // make a copy of the key vec_t tup(key, klen); state->ext->getKey(page, info->loc.treeLoc.slot, tup); if (tup.ptr(0) != key) { // getKey() changed tup to point to the page, we // need to make a copy (void) memcpy(key, tup.ptr(0), tup.len(0)); } klen = tup.len(0); // make a copy of the data const keyrec_t& rec = page.rec(info->loc.treeLoc.slot); (void) memcpy(data, rec.elem(), rec.elen()); dlen = rec.elen(); child = 0; _static_unfix_page(*state->file, page); return (RCOK);}/////////////////////////////////////////////////////////////////////////// gist::createOpt - create 'optimal' tree based on opt. clustering//// Description:// - bulk-load tree level by level, using the hypergraph clustering// to produce an ordered input stream// - when calling _build_level(), specify a fill factor of 1.0 regardless// of the actual fill factor (a lot of times, the hypergraph clustering// produces clusters that are just a bit large, and we don't want to break// those up)// - installs opt tree and translation map in profile//// Return Values:// RCOK/////////////////////////////////////////////////////////////////////////rc_tgist::createOpt( const char* fileName, // in float fillFactor, // in const amdb_wkldprofile& profile, // in const amdb_clustering::Clustering& optClustering, // in: hypergraph clustering const amdb_treemap& tmap, // in: our amdb_treemap Rid2ItemIdMap& ridMap) // out{ _OptTreeBuildState buildState; char* temp1 = "cotemp1"; // to be changed later char* temp2 = "cotemp2"; bool toTemp1 = true; // true: write BPs to temp1 // VCPORT_B#ifdef WIN32 ofstream bpOutStream(temp1, ios_base::out | ios_base::binary ); ofstream tmpfile(temp2, ios_base::out | ios_base::binary );#else ofstream bpOutStream(temp1); // creates empty file ofstream tmpfile(temp2); // creates empty file#endif tmpfile.close(); // This is just a precaution, in case tmpfile is used again in this function#ifdef WIN32 tmpfile.clear();#endif#ifdef WIN32 ifstream bpInStream(temp2, ios_base::in | ios_base::binary );#else ifstream bpInStream(temp2); // opens empty file#endif // VCPORT_E if (fillFactor > 1.0) fillFactor = 1.0; // can't fill page more than once gist* optTree = new gist(); // the one we're building // these don't change between levels buildState.profile = &profile; buildState.file = &_file; buildState.ext = _ext; buildState.fillFactor = fillFactor; buildState.bpStream = &bpInStream; // create the root (must be in page 1) rc_t status; if ((status = optTree->create(fileName, _ext)) != RCOK) { delete optTree; return(status); } int level = 0; for (;;) { level++; TupleStream tstream; if (level == 1) { // read the leaf items in opt clustering order tstream = _optItemStream; _initLeafBuildState(&optClustering, &tmap, buildState); } else { // read BPs in opt clustering order tstream = _optBpStream; W_DO(_initInternalBuildState(buildState)); } buildState.nextItem = 0; int pageCnt; gist_p lastPage; W_DO(optTree->_build_level(tstream, (void *) &buildState, 1.0, level, rootNo, bpOutStream, _optBreakNotify, pageCnt, lastPage)); // build rid map, now that the leaf level is build (and the exact // clustering known) if (level == 1) { amdb_clustering::ClusterInfoVect::iterator it; int slot = 0; int prevClusterNo = 0; for (it = buildState.leafItemMap->info.begin(); it != buildState.leafItemMap->info.end(); it++, slot++) { if (prevClusterNo != it->clusterNo) { // we crossed a cluster boundary, reset slot slot = 0; prevClusterNo = it->clusterNo; } Rid rid; rid.page = it->clusterNo + 2; // + 2: cluster 0 is on page 2 rid.slot = slot; ridMap[rid] = it->itemNo; } } // leave buildState intact, we need optClustering for the next level buildState.update(); // finished with streams for now bpOutStream.close(); bpInStream.close(); // VCPORT_B // Because the stream state doesn't get rest when file is closed.#ifdef WIN32 bpOutStream.clear(); bpInStream.clear();#endif // VCPORT_E if (pageCnt == 1) { // The last level we produced contains only 1 page: this // is the root level. We must copy the last page of this // level to the (fixed-location) root page, adjust the // level in the root page's header and then delete the // last page. gist_p rootPage; W_DO(optTree->_fix_page(rootPage, rootNo, LATCH_EX)); W_DO(lastPage.copy(rootPage)); rootPage.set_level(level); optTree->_unfix_page(rootPage); W_DO(optTree->_file.freePage(lastPage._descr)); lastPage._pp = NULL; if (unlink(temp1) != 0) return eFILEERROR; if (unlink(temp2) != 0) return eFILEERROR; optTree->close(); break; } else { // lastPage is still fixed optTree->_unfix_page(lastPage); // switch BP input and output for the next level toTemp1 = (toTemp1 ? false : true); if (toTemp1) { // write to temp1, read from temp2 // VCPORT_B#ifdef WIN32 bpOutStream.open(temp1, ios::out | ios::binary ); bpInStream.open(temp2, ios::in | ios::binary );#else bpOutStream.open(temp1); bpInStream.open(temp2);#endif } else {#ifdef WIN32 bpOutStream.open(temp2, ios::out | ios::binary ); bpInStream.open(temp1, ios::in | ios::binary );#else bpOutStream.open(temp2); bpInStream.open(temp1);#endif } // VCPORT_E } } return(RCOK);}/////////////////////////////////////////////////////////////////////////// setBreakHandler - set amdb break handler//// Description://// Preconditions:// Postconditions:// Notes://// Return Values:// RCOK/////////////////////////////////////////////////////////////////////////voidgist::setBreakHandler( amdb_breakpoints::BreakHandler handler) { _breakHandler = handler;}/////////////////////////////////////////////////////////////////////////// gist::computeTreeMap - driver for _computeTreeMap//// Description://// Return Values:// RCOK/////////////////////////////////////////////////////////////////////////rc_tgist::computeTreeMap( amdb_treemap* tmap) // out{ assert(tmap != NULL); return (_computeTreeMap(tmap, rootNo));}/////////////////////////////////////////////////////////////////////////// computeTreeMap - traverse tree recursively to compute stats//// Description:// - computes item map and/or node stats (not computed if // corresponding parameter is NULL)//// Return Values:// RCOK/////////////////////////////////////////////////////////////////////////rc_tgist::_computeTreeMap( amdb_treemap* tmap, // out shpid_t pgno) // in: root of subtree{ gist_p page; W_DO(_fix_page(page, pgno, LATCH_SH)); int recCnt = page.nrecs(); unsigned int min = MAXINT, max = 0; float avg = 0; for (int i = 0; i < recCnt; i++) { const keyrec_t& tup = page.rec(i); // compute min, max and avg of key lengths min = (min <= tup.klen() ? min : tup.klen()); max = (max >= tup.klen() ? max : tup.klen()); avg += (float) tup.klen(); if (!page.is_leaf()) { _computeTreeMap(tmap, tup.child()); } else if (tmap != NULL) { // record every item in map; we cannot use // gist_p::rec_size(idx) to compute the size, because this // ignores alignment requirements and the slot index tmap->addTreeItem(pgno, i, gist_p::rec_size(tup.klen(), tup.elen())); } } // record page (also for non-leaf pages, might be necessary in // amdb_wkldstats::_computeQueryStats()) tmap->addPage(pgno, page.nrecs(), page.level(), min, avg / (float) recCnt, max, 1.0 - ((float) page.usable_space()/(float) gist_p::data_sz)); _unfix_page(page); return RCOK;}///////////////////////////////////////////////////////////////////////////////// gist::getPredInfo -// get display-relevant information about predicates in subtree//// Description:// - returns info about all predicates, including BPs// - initialize 'color' to 0//// Return Values:// RCOK// eARRAYSIZE: 'predInfo' is too small///////////////////////////////////////////////////////////////////////////////rc_tgist::getPredInfo( shpid_t subtreeRoot, // in int levels, // in: # of levels to look at DisplayPredInfo predInfo[], // out int& numPredInfo) // in/out{ int next = 0; // start filling predInfo at 0 // we pass off ROOTPAGE as this page's parent, because we don't know the // real parent; for the purposes of tree vis, this inaccuracy doesn't // matter rc_t status = _getPredInfo(subtreeRoot, ROOTPAGE, levels, predInfo, next, numPredInfo); if (status != RCOK) { return(status); } numPredInfo = next; // _getPredInfo() maintains next return(RCOK);}///////////////////////////////////////////////////////////////////////////////// gist::_getPredInfo - recursive traversal of subtree// get display-relevant information about predicates in subtree//// Description://// Return Values:// RCOK// eARRAYSIZE: 'predInfo' is too small///////////////////////////////////////////////////////////////////////////////rc_tgist::_getPredInfo( shpid_t subtreeRoot, // in shpid_t parent, // in int levels, // in: # of levels still to look at DisplayPredInfo predInfo[], // out int& next, // in: next element in predInfo to set int numPredInfo) // in{ gist_p page; W_DO(_fix_page(page, subtreeRoot, LATCH_SH)); int recCnt = page.nrecs(); // record pred info in predInfo
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -