rtree.c
来自「PostgreSQL7.4.6 for Linux」· C语言 代码 · 共 1,365 行 · 第 1/3 页
C
1,365 行
OffsetNumber *spl_left, *spl_right; TupleDesc tupDesc; int n; OffsetNumber newitemoff; p = (Page) BufferGetPage(buffer); opaque = (RTreePageOpaque) PageGetSpecialPointer(p); rtpicksplit(r, p, &v, itup, rtstate); /* * The root of the tree is the first block in the relation. If we're * about to split the root, we need to do some hocus-pocus to enforce * this guarantee. */ if (BufferGetBlockNumber(buffer) == P_ROOT) { leftbuf = ReadBuffer(r, P_NEW); RTInitBuffer(leftbuf, opaque->flags); lbknum = BufferGetBlockNumber(leftbuf); left = (Page) BufferGetPage(leftbuf); } else { leftbuf = buffer; IncrBufferRefCount(buffer); lbknum = BufferGetBlockNumber(buffer); left = (Page) PageGetTempPage(p, sizeof(RTreePageOpaqueData)); } rightbuf = ReadBuffer(r, P_NEW); RTInitBuffer(rightbuf, opaque->flags); rbknum = BufferGetBlockNumber(rightbuf); right = (Page) BufferGetPage(rightbuf); spl_left = v.spl_left; spl_right = v.spl_right; leftoff = rightoff = FirstOffsetNumber; maxoff = PageGetMaxOffsetNumber(p); newitemoff = OffsetNumberNext(maxoff); /* build an InsertIndexResult for this insertion */ res = (InsertIndexResult) palloc(sizeof(InsertIndexResultData)); /* * spl_left contains a list of the offset numbers of the tuples that * will go to the left page. For each offset number, get the tuple * item, then add the item to the left page. Similarly for the right * side. */ /* fill left node */ for (n = 0; n < v.spl_nleft; n++) { i = *spl_left; if (i == newitemoff) item = itup; else { itemid = PageGetItemId(p, i); item = (IndexTuple) PageGetItem(p, itemid); } if (PageAddItem(left, (Item) item, IndexTupleSize(item), leftoff, LP_USED) == InvalidOffsetNumber) elog(ERROR, "failed to add index item to \"%s\"", RelationGetRelationName(r)); leftoff = OffsetNumberNext(leftoff); if (i == newitemoff) ItemPointerSet(&(res->pointerData), lbknum, leftoff); spl_left++; /* advance in left split vector */ } /* fill right node */ for (n = 0; n < v.spl_nright; n++) { i = *spl_right; if (i == newitemoff) item = itup; else { itemid = PageGetItemId(p, i); item = (IndexTuple) PageGetItem(p, itemid); } if (PageAddItem(right, (Item) item, IndexTupleSize(item), rightoff, LP_USED) == InvalidOffsetNumber) elog(ERROR, "failed to add index item to \"%s\"", RelationGetRelationName(r)); rightoff = OffsetNumberNext(rightoff); if (i == newitemoff) ItemPointerSet(&(res->pointerData), rbknum, rightoff); spl_right++; /* advance in right split vector */ } /* Make sure we consumed all of the split vectors, and release 'em */ Assert(*spl_left == InvalidOffsetNumber); Assert(*spl_right == InvalidOffsetNumber); pfree(v.spl_left); pfree(v.spl_right); if ((bufblock = BufferGetBlockNumber(buffer)) != P_ROOT) PageRestoreTempPage(left, p); WriteBuffer(leftbuf); WriteBuffer(rightbuf); /* * Okay, the page is split. We have three things left to do: * * 1) Adjust any active scans on this index to cope with changes we * introduced in its structure by splitting this page. * * 2) "Tighten" the bounding box of the pointer to the left page in the * parent node in the tree, if any. Since we moved a bunch of stuff * off the left page, we expect it to get smaller. This happens in * the internal insertion routine. * * 3) Insert a pointer to the right page in the parent. This may cause * the parent to split. If it does, we need to repeat steps one and * two for each split node in the tree. */ /* adjust active scans */ rtadjscans(r, RTOP_SPLIT, bufblock, FirstOffsetNumber); tupDesc = r->rd_att; isnull = (char *) palloc(r->rd_rel->relnatts); for (blank = 0; blank < r->rd_rel->relnatts; blank++) isnull[blank] = ' '; ltup = (IndexTuple) index_formtuple(tupDesc, &(v.spl_ldatum), isnull); rtup = (IndexTuple) index_formtuple(tupDesc, &(v.spl_rdatum), isnull); pfree(isnull); /* set pointers to new child pages in the internal index tuples */ ItemPointerSet(&(ltup->t_tid), lbknum, 1); ItemPointerSet(&(rtup->t_tid), rbknum, 1); rtintinsert(r, stack, ltup, rtup, rtstate); pfree(ltup); pfree(rtup); return res;}static voidrtintinsert(Relation r, RTSTACK *stk, IndexTuple ltup, IndexTuple rtup, RTSTATE *rtstate){ IndexTuple old; Buffer b; Page p; Datum ldatum, rdatum, newdatum; InsertIndexResult res; if (stk == (RTSTACK *) NULL) { rtnewroot(r, ltup, rtup); return; } b = ReadBuffer(r, stk->rts_blk); p = BufferGetPage(b); old = (IndexTuple) PageGetItem(p, PageGetItemId(p, stk->rts_child)); /* * This is a hack. Right now, we force rtree internal keys to be * constant size. To fix this, need delete the old key and add both * left and right for the two new pages. The insertion of left may * force a split if the new left key is bigger than the old key. */ if (IndexTupleSize(old) != IndexTupleSize(ltup)) ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("variable-length rtree keys are not supported"))); /* install pointer to left child */ memmove(old, ltup, IndexTupleSize(ltup)); if (nospace(p, rtup)) { newdatum = IndexTupleGetDatum(ltup); rttighten(r, stk->rts_parent, newdatum, IndexTupleAttSize(ltup), rtstate); res = rtdosplit(r, b, stk->rts_parent, rtup, rtstate); WriteBuffer(b); /* don't forget to release buffer! - * 01/31/94 */ pfree(res); } else { if (PageAddItem(p, (Item) rtup, IndexTupleSize(rtup), PageGetMaxOffsetNumber(p), LP_USED) == InvalidOffsetNumber) elog(ERROR, "failed to add index item to \"%s\"", RelationGetRelationName(r)); WriteBuffer(b); ldatum = IndexTupleGetDatum(ltup); rdatum = IndexTupleGetDatum(rtup); newdatum = FunctionCall2(&rtstate->unionFn, ldatum, rdatum); rttighten(r, stk->rts_parent, newdatum, IndexTupleAttSize(rtup), rtstate); pfree(DatumGetPointer(newdatum)); }}static voidrtnewroot(Relation r, IndexTuple lt, IndexTuple rt){ Buffer b; Page p; b = ReadBuffer(r, P_ROOT); RTInitBuffer(b, 0); p = BufferGetPage(b); if (PageAddItem(p, (Item) lt, IndexTupleSize(lt), FirstOffsetNumber, LP_USED) == InvalidOffsetNumber) elog(ERROR, "failed to add index item to \"%s\"", RelationGetRelationName(r)); if (PageAddItem(p, (Item) rt, IndexTupleSize(rt), OffsetNumberNext(FirstOffsetNumber), LP_USED) == InvalidOffsetNumber) elog(ERROR, "failed to add index item to \"%s\"", RelationGetRelationName(r)); WriteBuffer(b);}/* * Choose how to split an rtree page into two pages. * * We return two vectors of index item numbers, one for the items to be * put on the left page, one for the items to be put on the right page. * In addition, the item to be added (itup) is listed in the appropriate * vector. It is represented by item number N+1 (N = # of items on page). * * Both vectors have a terminating sentinel value of InvalidOffsetNumber, * but the sentinal value is no longer used, because the SPLITVEC * vector also contains the length of each vector, and that information * is now used to iterate over them in rtdosplit(). --kbb, 21 Sept 2001 * * The bounding-box datums for the two new pages are also returned in *v. * * This is the quadratic-cost split algorithm Guttman describes in * his paper. The reason we chose it is that you can implement this * with less information about the data types on which you're operating. * * We must also deal with a consideration not found in Guttman's algorithm: * variable-length data. In particular, the incoming item might be * large enough that not just any split will work. In the worst case, * our "split" may have to be the new item on one page and all the existing * items on the other. Short of that, we have to take care that we do not * make a split that leaves both pages too full for the new item. */static voidrtpicksplit(Relation r, Page page, SPLITVEC *v, IndexTuple itup, RTSTATE *rtstate){ OffsetNumber maxoff, newitemoff; OffsetNumber i, j; IndexTuple item_1, item_2; Datum datum_alpha, datum_beta; Datum datum_l, datum_r; Datum union_d, union_dl, union_dr; Datum inter_d; bool firsttime; float size_alpha, size_beta, size_union, size_inter; float size_waste, waste; float size_l, size_r; int nbytes; OffsetNumber seed_1 = 0, seed_2 = 0; OffsetNumber *left, *right; Size newitemsz, item_1_sz, item_2_sz, left_avail_space, right_avail_space; int total_num_tuples, num_tuples_without_seeds, max_after_split; /* in Guttman's lingo, (M - m) */ float diff; /* diff between cost of putting tuple left * or right */ SPLITCOST *cost_vector; int n; /* * First, make sure the new item is not so large that we can't * possibly fit it on a page, even by itself. (It's sufficient to * make this test here, since any oversize tuple must lead to a page * split attempt.) */ newitemsz = IndexTupleTotalSize(itup); if (newitemsz > RTPageAvailSpace) ereport(ERROR, (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), errmsg("index row size %lu exceeds rtree maximum, %lu", (unsigned long) newitemsz, (unsigned long) RTPageAvailSpace))); maxoff = PageGetMaxOffsetNumber(page); newitemoff = OffsetNumberNext(maxoff); /* phony index for new * item */ total_num_tuples = newitemoff; num_tuples_without_seeds = total_num_tuples - 2; max_after_split = total_num_tuples / 2; /* works for m = M/2 */ /* Make arrays big enough for worst case, including sentinel */ nbytes = (maxoff + 2) * sizeof(OffsetNumber); v->spl_left = (OffsetNumber *) palloc(nbytes); v->spl_right = (OffsetNumber *) palloc(nbytes); firsttime = true; waste = 0.0; for (i = FirstOffsetNumber; i < maxoff; i = OffsetNumberNext(i)) { item_1 = (IndexTuple) PageGetItem(page, PageGetItemId(page, i)); datum_alpha = IndexTupleGetDatum(item_1); item_1_sz = IndexTupleTotalSize(item_1); for (j = OffsetNumberNext(i); j <= maxoff; j = OffsetNumberNext(j)) { item_2 = (IndexTuple) PageGetItem(page, PageGetItemId(page, j)); datum_beta = IndexTupleGetDatum(item_2); item_2_sz = IndexTupleTotalSize(item_2); /* * Ignore seed pairs that don't leave room for the new item on * either split page. */ if (newitemsz + item_1_sz > RTPageAvailSpace && newitemsz + item_2_sz > RTPageAvailSpace) continue; /* compute the wasted space by unioning these guys */ union_d = FunctionCall2(&rtstate->unionFn, datum_alpha, datum_beta); FunctionCall2(&rtstate->sizeFn, union_d, PointerGetDatum(&size_union)); inter_d = FunctionCall2(&rtstate->interFn, datum_alpha, datum_beta); /* * The interFn may return a NULL pointer (not an SQL null!) to * indicate no intersection. sizeFn must cope with this. */ FunctionCall2(&rtstate->sizeFn, inter_d, PointerGetDatum(&size_inter)); size_waste = size_union - size_inter; if (DatumGetPointer(union_d) != NULL) pfree(DatumGetPointer(union_d)); if (DatumGetPointer(inter_d) != NULL) pfree(DatumGetPointer(inter_d)); /* * are these a more promising split that what we've already * seen? */ if (size_waste > waste || firsttime) { waste = size_waste; seed_1 = i; seed_2 = j; firsttime = false; } } } if (firsttime) { /* * There is no possible split except to put the new item on its * own page. Since we still have to compute the union rectangles, * we play dumb and run through the split algorithm anyway, * setting seed_1 = first item on page and seed_2 = new item. */ seed_1 = FirstOffsetNumber; seed_2 = newitemoff; } item_1 = (IndexTuple) PageGetItem(page, PageGetItemId(page, seed_1)); datum_alpha = IndexTupleGetDatum(item_1); datum_l = FunctionCall2(&rtstate->unionFn, datum_alpha, datum_alpha); FunctionCall2(&rtstate->sizeFn, datum_l, PointerGetDatum(&size_l)); left_avail_space = RTPageAvailSpace - IndexTupleTotalSize(item_1); if (seed_2 == newitemoff) { item_2 = itup; /* Needn't leave room for new item in calculations below */ newitemsz = 0; } else item_2 = (IndexTuple) PageGetItem(page, PageGetItemId(page, seed_2)); datum_beta = IndexTupleGetDatum(item_2); datum_r = FunctionCall2(&rtstate->unionFn, datum_beta, datum_beta); FunctionCall2(&rtstate->sizeFn, datum_r, PointerGetDatum(&size_r)); right_avail_space = RTPageAvailSpace - IndexTupleTotalSize(item_2); /* * Now split up the regions between the two seeds. * * The cost_vector array will contain hints for determining where each * tuple should go. Each record in the array will contain a boolean, * choose_left, that indicates which node the tuple prefers to be on, * and the absolute difference in cost between putting the tuple in * its favored node and in the other node. * * Later, we will sort the cost_vector in descending order by cost * difference, and consider the tuples in that order for placement. * That way, the tuples that *really* want to be in one node or the * other get to choose first, and the tuples that don't really care * choose last. * * First, build the cost_vector array. The new index tuple will also be * handled in this loop, and represented in the array, with * i==newitemoff. * * In the case of variable size tuples it is possible that we only have * the two seeds and no other tuples, in which case we don't do any of
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?