📄 kd_split.cpp
字号:
// fair_split - fair-split splitting rule//// This is a compromise between the kd-tree splitting rule (which// always splits data points at their median) and the midpoint// splitting rule (which always splits a box through its center.// The goal of this procedure is to achieve both nicely balanced// splits, and boxes of bounded aspect ratio.//// A constant FS_ASPECT_RATIO is defined. Given a box, those sides// which can be split so that the ratio of the longest to shortest// side does not exceed ASPECT_RATIO are identified. Among these// sides, we select the one in which the points have the largest// spread. We then split the points in a manner which most evenly// distributes the points on either side of the splitting plane,// subject to maintaining the bound on the ratio of long to short// sides. To determine that the aspect ratio will be preserved,// we determine the longest side (other than this side), and// determine how narrowly we can cut this side, without causing the// aspect ratio bound to be exceeded (small_piece).//// This procedure is more robust than either kd_split or midpt_split,// but is more complicated as well. When point distribution is// extremely skewed, this degenerates to midpt_split (actually// 1/3 point split), and when the points are most evenly distributed,// this degenerates to kd-split.//----------------------------------------------------------------------void fair_split( ANNpointArray pa, // point array ANNidxArray pidx, // point indices (permuted on return) const ANNorthRect &bnds, // bounding rectangle for cell int n, // number of points int dim, // dimension of space int &cut_dim, // cutting dimension (returned) ANNcoord &cut_val, // cutting value (returned) int &n_lo) // num of points on low side (returned){ int d; ANNcoord max_length = bnds.hi[0] - bnds.lo[0]; cut_dim = 0; for (d = 1; d < dim; d++) { // find length of longest box side ANNcoord length = bnds.hi[d] - bnds.lo[d]; if (length > max_length) { max_length = length; cut_dim = d; } } ANNcoord max_spread = 0; // find legal cut with max spread cut_dim = 0; for (d = 0; d < dim; d++) { ANNcoord length = bnds.hi[d] - bnds.lo[d]; // is this side midpoint splitable // without violating aspect ratio? if (((double) max_length)*2.0/((double) length) <= FS_ASPECT_RATIO) { // compute spread along this dim ANNcoord spr = annSpread(pa, pidx, n, d); if (spr > max_spread) { // best spread so far max_spread = spr; cut_dim = d; // this is dimension to cut } } } max_length = 0; // find longest side other than cut_dim for (d = 0; d < dim; d++) { ANNcoord length = bnds.hi[d] - bnds.lo[d]; if (d != cut_dim && length > max_length) max_length = length; } // consider most extreme splits ANNcoord small_piece = max_length / FS_ASPECT_RATIO; ANNcoord lo_cut = bnds.lo[cut_dim] + small_piece;// lowest legal cut ANNcoord hi_cut = bnds.hi[cut_dim] - small_piece;// highest legal cut int br1, br2; // is median below lo_cut ? if (annSplitBalance(pa, pidx, n, cut_dim, lo_cut) >= 0) { cut_val = lo_cut; // cut at lo_cut annPlaneSplit(pa, pidx, n, cut_dim, cut_val, br1, br2); n_lo = br1; } // is median above hi_cut? else if (annSplitBalance(pa, pidx, n, cut_dim, hi_cut) <= 0) { cut_val = hi_cut; // cut at hi_cut annPlaneSplit(pa, pidx, n, cut_dim, cut_val, br1, br2); n_lo = br2; } else { // median cut preserves asp ratio n_lo = n/2; // split about median annMedianSplit(pa, pidx, n, cut_dim, cut_val, n_lo); }}//----------------------------------------------------------------------// sl_fair_split - sliding fair split splitting rule//// Sliding fair split is a splitting rule that combines the// strengths of both fair split with sliding midpoint split.// Fair split tends to produce balanced splits when the points// are roughly uniformly distributed, but it can produce many// trivial splits when points are highly clustered. Sliding// midpoint never produces trivial splits, and shrinks boxes// nicely if points are highly clustered, but it may produce// rather unbalanced splits when points are unclustered but not// quite uniform.//// Sliding fair split is based on the theory that there are two// types of splits that are "good": balanced splits that produce// fat boxes, and unbalanced splits provided the cell with fewer// points is fat.//// This splitting rule operates by first computing the longest// side of the current bounding box. Then it asks which sides// could be split (at the midpoint) and still satisfy the aspect// ratio bound with respect to this side. Among these, it selects// the side with the largest spread (as fair split would). It// then considers the most extreme cuts that would be allowed by// the aspect ratio bound. This is done by dividing the longest// side of the box by the aspect ratio bound. If the median cut// lies between these extreme cuts, then we use the median cut.// If not, then consider the extreme cut that is closer to the// median. If all the points lie to one side of this cut, then// we slide the cut until it hits the first point. This may// violate the aspect ratio bound, but will never generate empty// cells. However the sibling of every such skinny cell is fat,// and hence packing arguments still apply.////----------------------------------------------------------------------void sl_fair_split( ANNpointArray pa, // point array ANNidxArray pidx, // point indices (permuted on return) const ANNorthRect &bnds, // bounding rectangle for cell int n, // number of points int dim, // dimension of space int &cut_dim, // cutting dimension (returned) ANNcoord &cut_val, // cutting value (returned) int &n_lo) // num of points on low side (returned){ int d; ANNcoord min, max; // min/max coordinates int br1, br2; // split break points ANNcoord max_length = bnds.hi[0] - bnds.lo[0]; cut_dim = 0; for (d = 1; d < dim; d++) { // find length of longest box side ANNcoord length = bnds.hi[d] - bnds.lo[d]; if (length > max_length) { max_length = length; cut_dim = d; } } ANNcoord max_spread = 0; // find legal cut with max spread cut_dim = 0; for (d = 0; d < dim; d++) { ANNcoord length = bnds.hi[d] - bnds.lo[d]; // is this side midpoint splitable // without violating aspect ratio? if (((double) max_length)*2.0/((double) length) <= FS_ASPECT_RATIO) { // compute spread along this dim ANNcoord spr = annSpread(pa, pidx, n, d); if (spr > max_spread) { // best spread so far max_spread = spr; cut_dim = d; // this is dimension to cut } } } max_length = 0; // find longest side other than cut_dim for (d = 0; d < dim; d++) { ANNcoord length = bnds.hi[d] - bnds.lo[d]; if (d != cut_dim && length > max_length) max_length = length; } // consider most extreme splits ANNcoord small_piece = max_length / FS_ASPECT_RATIO; ANNcoord lo_cut = bnds.lo[cut_dim] + small_piece;// lowest legal cut ANNcoord hi_cut = bnds.hi[cut_dim] - small_piece;// highest legal cut // find min and max along cut_dim annMinMax(pa, pidx, n, cut_dim, min, max); // is median below lo_cut? if (annSplitBalance(pa, pidx, n, cut_dim, lo_cut) >= 0) { if (max > lo_cut) { // are any points above lo_cut? cut_val = lo_cut; // cut at lo_cut annPlaneSplit(pa, pidx, n, cut_dim, cut_val, br1, br2); n_lo = br1; // balance if there are ties } else { // all points below lo_cut cut_val = max; // cut at max value annPlaneSplit(pa, pidx, n, cut_dim, cut_val, br1, br2); n_lo = n-1; } } // is median above hi_cut? else if (annSplitBalance(pa, pidx, n, cut_dim, hi_cut) <= 0) { if (min < hi_cut) { // are any points below hi_cut? cut_val = hi_cut; // cut at hi_cut annPlaneSplit(pa, pidx, n, cut_dim, cut_val, br1, br2); n_lo = br2; // balance if there are ties } else { // all points above hi_cut cut_val = min; // cut at min value annPlaneSplit(pa, pidx, n, cut_dim, cut_val, br1, br2); n_lo = 1; } } else { // median cut is good enough n_lo = n/2; // split about median annMedianSplit(pa, pidx, n, cut_dim, cut_val, n_lo); }}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -