📄 gist_btree.cc

📁 Libgist is an implementation of the Generalized Search Tree, a template index structure that makes i
💻 CC
📖 第 1 页 / 共 2 页
字号:
上一页 12
///////////////////////////////////////////////////////////////////////////////// gist_btree::pickSplit - choose split point that balances size of nodes//// Description:// 	- Chooses a split point that roughly balances the size of the two new//	  nodes. Also takes sizes of new entries into account.//// Preconditions:// Postconditions:// Notes://// Return Values://      RCOK///////////////////////////////////////////////////////////////////////////////rc_t bt_ext_t::pickSplit(    gist_p& page,    int rightEntries[],    int& numRight,    const vec_t& oldBp,    vec_t& leftBp,    vec_t& rightBp,    const vec_t& entry1,    bool& oneGoesRight,    const vec_t& entry2,    bool& twoGoesRight){    // first, count the number of bytes used for all keys (also the new ones)    int totalBytes = 0;    int slotCnt = page.nrecs();    int i;    for (i = 0; i < slotCnt; i++) {	// for internal entries, we include the data pointers, because	// these will also be distributed (and who knows, they might	// even be varying length)	totalBytes += page.rec(i).klen();    }    totalBytes += entry1.len(0);    if (entry1.size() != 0) slotCnt++;    totalBytes += entry2.len(0);    if (entry2.size() != 0) slotCnt++;    PosInfo entries[gist_p::max_scnt + 2];    keyrec_t::hdr_s hdr1, hdr2;    _loadPosInfo(page, entry1, entry2, hdr1, hdr2, entries);    // now, accumulate entries until you exceed half the total size    // (entries[] contains sizes for all entries in key/data order, including new entries)    int total = 0;    i = 0;    while (total < totalBytes/2 && i < slotCnt) {        total += entries[i].hdr->klen();	i++;    }    assert(i != slotCnt); // can't be...    // everything from slot i on goes right ...    numRight = 0;    oneGoesRight = false;    twoGoesRight = false;    for (int j = i; j < slotCnt; j++) {	if (entries[j].slot > 0) {	    rightEntries[numRight] = entries[j].slot;	    numRight++;	} else if (entries[j].slot == -1) {	    oneGoesRight = true;	} else {	    assert(entries[j].slot == -2);	    twoGoesRight = true;	}    }    // the BP of the original node stays the same    if (oldBp.size() != 0) {	(void) memcpy(leftBp.ptr(0), oldBp.ptr(0), oldBp.len(0));	const void *leftptr = leftBp.ptr(0);	leftBp.set(leftptr, oldBp.len(0));    } else {        // this is what used to be the root;	// the BP becomes -infinity	void *leftptr = leftBp.ptr(0);	negInftyKey(leftptr);	int leftsz = this->keySize(leftptr);	void *dataptr = (void *) ((char *) leftptr + leftsz);	negInftyData(dataptr);	leftsz += this->dataSize(dataptr);	leftBp.set(leftptr, leftsz);    }    // the BP of the new right sibling is the item at the split point    void* rightptr = rightBp.ptr(0);    int rightlen;    if (entries[i].slot > 0) {        // take then BP from the page	const keyrec_t& tup = page.rec(entries[i].slot);	(void) memcpy(rightptr, tup.key(), tup.klen());	rightlen = tup.klen();	if (page.is_leaf()) {	    // also copy the data ptr	    (void) memcpy((void *) ((char *) rightptr + rightlen), tup.elem(), tup.elen());	    rightlen += tup.elen();	}    } else {        const vec_t* e;	if (entries[i].slot == -1) {	    e = &entry1;	} else {	    assert(entries[i].slot == -2);	    e = &entry2;	}	(void) memcpy(rightptr, e->ptr(0), e->len(0));	rightlen = e->len(0);	if (page.is_leaf()) {	    (void) memcpy((void *) ((char *) rightptr + (int) rightlen), e->ptr(1), e->len(1));	    rightlen += e->len(1);	}    }    rightBp.set(rightptr, rightlen);    return RCOK;}/////////////////////////////////////////////////////////////////////////// bt_ext_t::unionBp - generate BP//// Description://	- B-trees partition the data space, which means that BPs do not//	  change when data is added to or delete from a page//	- unfortunately, we can't generally provide the correct BP // 	  for bulk-loading (don't know when to return -\infty)///////////////////////////////////////////////////////////////////////////voidbt_ext_t::unionBp(    const gist_p& page, // in    vec_t& bp, // in/out    bool bpIsValid, // in    const vec_t& pred1, // in    const vec_t& pred2, // in    bool& bpChanged) // out{    bpChanged = false;}gist_cursorext_t*bt_ext_t::queryCursor(    const gist_query_t* query) const{    return gist_cursorext_t::gist_cursorext_list[gist_cursorext_t::cext_stack_id];}bool bt_ext_t::check(    const vec_t& bp,    const vec_t& pred,    int level){    if (keyCmp(pred.ptr(0), bp.ptr(0)) < 0) return false;    if (level > 1) {        // check data contained in predicate	void* bpData = (char *) bp.ptr(0) + keySize(bp.ptr(0));	void* predData = (char *) pred.ptr(0) + keySize(pred.ptr(0));	return (dataCmp(predData, bpData) >= 0);    }    return true;}intbt_ext_t::_binSearch(    const gist_p& page,    const void* key,    const void* data,    bool keyOnly) // true: only compare keys{    int hi = page.nrecs() - 1;    if (hi == -1) {        // nothing on this page yet	return -1;    }    int lo = 0;    int mid;    const void* midkey;    const void* middata;    int res;    for (;;) {	mid = (hi + lo) / 2;        const keyrec_t& tup = page.rec(mid);	midkey = tup.key();	if (page.is_leaf()) {	    middata = tup.elem();	} else {	    int sz = this->keySize(midkey);	    middata = (const void *) (((char *) midkey) + sz);	}	res = keyCmp(key, midkey);	if (!keyOnly && res == 0) {	    res = dataCmp(data, middata);	}	if (res < 0) {	    // key is smaller than midpoint	    hi = mid - 1;	} else if (res > 0) {	    lo = mid + 1;	} else {	    // found an exact match, but not sure it's the first one	    hi = mid; // not mid - 1, we might end up returning mid	    if (hi == lo) return hi;	}	if (hi < lo) return hi;    }#if 0 // just for explanatory purposes    if (res < 0) {        return mid-1;	// because mid-1 is our upper bound, but also our lower bound	// (hi <= lo)     } else {        // res > 0: lo = hi, because mid < hi and lo now = mid + 1        return hi;    }#endif}// Determine where the two new entries would go on the page (which slots they// would occupy). Returns this info through array of PosInfos, two of which will // contain info for new entriesvoidbt_ext_t::_loadPosInfo(    gist_p& page,    const vec_t& entry1,    const vec_t& entry2,    keyrec_t::hdr_s& hdr1, // in: hdr_s for 1st entry, needed for entries[]    keyrec_t::hdr_s& hdr2, // in: hdr_s for 2nd entry, needed for entries[]    PosInfo entries[]) // out: hdrs of all items of page + new entries, sorted in key/data order{    int cnt = page.nrecs();    int numEntries = cnt;    int k;    for (k = 0; k < cnt; k++) {        entries[k].hdr = &page.rec(k);	entries[k].slot = k;    }        if (entry1.size() == 0) {	// no entries to add to PosInfo, we're done        return;    }    // Figure out where entry1/-2 would go.    const void *data1, *data2;    if (page.is_leaf()) {	data1 = entry1.ptr(1);	data2 = entry2.ptr(1);    } else {	// extract data portion from "key"	data1 = (((char *) entry1.ptr(0)) + this->keySize(entry1.ptr(0)));	data2 = (((char *) entry2.ptr(0)) + this->keySize(entry2.ptr(0)));    }    int oneSlot = _binSearch(page, entry1.ptr(0), data1, false) + 1;    int twoSlot = -1;    const vec_t* firstEntry = NULL; // new entry with "lower" slot index    const vec_t* secondEntry = NULL; // new entry with "higher" slot index    if (entry2.size() != 0) {	twoSlot = _binSearch(page, entry2.ptr(0), data2, false) + 1;	if (oneSlot == twoSlot) {	    // we have to determine which one of the entries goes first	    int res = keyCmp(entry1.ptr(0), entry2.ptr(0));	    if (res == 0) {	        res = dataCmp(data1, data2);	    }	    if (res < 0) {	        firstEntry = &entry1;		secondEntry = &entry2;	    } else if (res > 0) {	        firstEntry = &entry2;		secondEntry = &entry1;	    } else {	        // res == 0: something's wrong (we've got perfect duplicates)		assert(0);	    }	} else if (oneSlot < twoSlot) {	    firstEntry = &entry1;	    secondEntry = &entry2;	} else { // oneSlot > twoSlot	    firstEntry = &entry2;	    secondEntry = &entry2;	}    } else {        // we only have entry1	secondEntry = &entry1;    }    int firstSlot = (oneSlot > twoSlot ? twoSlot : oneSlot);    int secondSlot = (oneSlot > twoSlot ? oneSlot : twoSlot);    bool oneGoesFirst = (firstEntry == &entry1);    // insert one entry    hdr1.klen = secondEntry->len(0);    numEntries++;    for (k = numEntries-1; k > secondSlot; k--) {        entries[k] = entries[k-1];    }    entries[secondSlot].hdr = (keyrec_t *) &hdr1;    entries[secondSlot].slot = (oneGoesFirst ? -2 : -1);    // insert other entry    if (entry2.size() != 0) {	hdr2.klen = firstEntry->len(0);	numEntries++;	for (k = numEntries-1; k > firstSlot; k--) {	    entries[k] = entries[k-1];	}	entries[firstSlot].hdr = (keyrec_t *) &hdr2;	entries[firstSlot].slot = (oneGoesFirst ? -1 : -2);    }}static intint_size(const void *i){    return sizeof(int);}static voidint_negInfty(void *i){    // can't use assignment, i might not be aligned properly    int min = MININT;    (void) memcpy(i, (void *) &min, sizeof(min));}bt_ext_t bt_int_ext(gist_ext_t::bt_int_ext_id, "bt_int_ext",    gist_support::printIntBtPred, gist_support::printInt,    &gist_support::parseInt, &gist_support::parseInt,    gist_support::parseIntQuery, int_cmp, int_cmp,    int_size, int_size, int_negInfty, int_negInfty);static intstr_size(const void *s){    return strlen((char *) s) + 1;}static voidstr_negInfty(void *s){    *((char *) s) = '\0';}bt_ext_t bt_str_ext(gist_ext_t::bt_str_ext_id, "bt_str_ext",    gist_support::printStringBtPred, gist_support::printInt,    &gist_support::parseString, &gist_support::parseInt,    gist_support::parseStringQuery, str_cmp, int_cmp,    str_size, int_size, str_negInfty, int_negInfty);
上一页 12
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -