📄 ann_test.cpp
字号:
//-------------------------------------------------------------------- if (stats > SILENT) { if (type == DATA) { cout << "[Read Data Points:\n"; cout << " data_size = " << n << "\n"; } else { cout << "[Read Query Points:\n"; cout << " query_size = " << n << "\n"; } cout << " file_name = " << file_nm << "\n"; cout << " dim = " << dim << "\n"; // print if results requested if ((type == DATA && stats >= SHOW_PTS) || (type == QUERY && stats >= QUERY_RES)) { cout << " (Points:\n"; for (i = 0; i < n; i++) { cout << " " << i << "\t"; printPoint(pa[i], dim); cout << "\n"; } cout << " )\n"; } cout << "]\n"; }}//------------------------------------------------------------------------// getTrueNN// Computes the true nearest neighbors. For purposes of validation,// this intentionally done in a rather dumb (but safe way), by// invoking the brute-force search.//// The number of true nearest neighbors is somewhat larger than// the number of nearest neighbors. This is so that the validation// can determine the expected difference in element ranks.//// This procedure is invoked just prior to running queries. Since// the operation takes a long time, it is performed only if needed.// In particular, once generated, it will be regenerated only if// new query or data points are generated, or if the requested number// of true near neighbors or approximate near neighbors has changed.//// To validate fixed-radius searching, we compute two counts, one// with the original query radius (trueSqRadius) and the other with// a radius shrunken by the error factor (minSqradius). We then// check that the count of points inside the approximate range is// between these two bounds. Because fixed-radius search is// allowed to ignore points within the shrunken radius, we only// compute exact neighbors within this smaller distance (for we// cannot guarantee that we will even visit the other points).//------------------------------------------------------------------------void getTrueNN() // compute true nearest neighbors{ if (stats > SILENT) { cout << "(Computing true nearest neighbors for validation. This may take time.)\n"; } // deallocate existing storage if (true_nn_idx != NULL) delete [] true_nn_idx; if (true_dists != NULL) delete [] true_dists; if (min_pts_in_range != NULL) delete [] min_pts_in_range; if (max_pts_in_range != NULL) delete [] max_pts_in_range; if (true_nn > data_size) { // can't get more nn than points true_nn = data_size; } // allocate true answer storage true_nn_idx = new ANNidx[true_nn*query_size]; true_dists = new ANNdist[true_nn*query_size]; min_pts_in_range = new int[query_size]; max_pts_in_range = new int[query_size]; ANNidxArray curr_nn_idx = true_nn_idx; // current locations in arrays ANNdistArray curr_dists = true_dists; // allocate search structure ANNbruteForce *the_brute = new ANNbruteForce(data_pts, data_size, dim); // compute nearest neighbors for (int i = 0; i < query_size; i++) { if (radius_bound == 0) { // standard kNN search the_brute->annkSearch( // compute true near neighbors query_pts[i], // query point true_nn, // number of nearest neighbors curr_nn_idx, // where to put indices curr_dists); // where to put distances } else { // fixed radius kNN search // search radii limits ANNdist trueSqRadius = ANN_POW(radius_bound); ANNdist minSqRadius = ANN_POW(radius_bound / (1+epsilon)); min_pts_in_range[i] = the_brute->annkFRSearch( query_pts[i], // query point minSqRadius, // shrunken search radius true_nn, // number of near neighbors curr_nn_idx, // nearest neighbors (returned) curr_dists); // distance (returned) max_pts_in_range[i] = the_brute->annkFRSearch( query_pts[i], // query point trueSqRadius, // true search radius 0, NULL, NULL); // (ignore kNN info) } curr_nn_idx += true_nn; // increment nn index pointer curr_dists += true_nn; // increment nn dist pointer } delete the_brute; // delete brute-force struct valid_dirty = ANNfalse; // validation good for now}//------------------------------------------------------------------------// doValidation// Compares the approximate answers to the k-nearest neighbors// against the true nearest neighbors (computed earlier). It is// assumed that the true nearest neighbors and indices have been// computed earlier.//// First, we check that all the results are within their allowed// limits, and generate an internal error, if not. For the sake of// performance evaluation, we also compute the following two// quantities for nearest neighbors://// Average Error// -------------// The relative error between the distance to a reported nearest// neighbor and the true nearest neighbor (of the same rank),//// Rank Error// ----------// The difference in rank between the reported nearest neighbor and// its position (if any) among the true nearest neighbors. If we// cannot find this point among the true nearest neighbors, then// it assumed that the rank of the true nearest neighbor is true_nn+1.//// Because of the possibility of duplicate distances, this is computed// as follows. For the j-th reported nearest neighbor, we count the// number of true nearest neighbors that are at least this close. Let// this be rnk. Then the rank error is max(0, j-rnk). (In the code// below, j is an array index and so the first item is 0, not 1. Thus// we take max(0, j+1-rnk) instead.)//// For the results of fixed-radious range count, we verify that the// reported number of points in the range lies between the actual// number of points in the shrunken and the true search radius.//------------------------------------------------------------------------void doValidation() // perform validation{ int* curr_apx_idx = apx_nn_idx; // approx index pointer ANNdistArray curr_apx_dst = apx_dists; // approx distance pointer int* curr_tru_idx = true_nn_idx; // true index pointer ANNdistArray curr_tru_dst = true_dists; // true distance pointer int i, j; if (true_nn < near_neigh) { Error("Cannot validate with fewer true near neighbors than actual", ANNabort); } for (i = 0; i < query_size; i++) { // validate each query //---------------------------------------------------------------- // Compute result errors // In fixed radius search it is possible that not all k // nearest neighbors were computed. Because the true // results are computed over the shrunken radius, we should // have at least as many true nearest neighbors as // approximate nearest neighbors. (If not, an infinite // error will be generated, and so an internal error will // will be generated. // // Because nearest neighbors are sorted in increasing order // of distance, as soon as we see a null index, we can // terminate the distance checking. The error in the // result should not exceed epsilon. However, if // max_pts_visit is nonzero (meaning that the search is // terminated early) this might happen. //---------------------------------------------------------------- for (j = 0; j < near_neigh; j++) { if (curr_tru_idx[j] == ANN_NULL_IDX)// no more true neighbors? break; // true i-th smallest distance double true_dist = ANN_ROOT(curr_tru_dst[j]); // reported i-th smallest double rept_dist = ANN_ROOT(curr_apx_dst[j]); // better than optimum? if (rept_dist < true_dist*(1-ERR)) { Error("INTERNAL ERROR: True nearest neighbor incorrect", ANNabort); } double resultErr; // result error if (true_dist == 0.0) { // let's not divide by zero if (rept_dist != 0.0) resultErr = ANN_DBL_MAX; else resultErr = 0.0; } else { resultErr = (rept_dist - true_dist) / ((double) true_dist); } if (resultErr > epsilon && max_pts_visit == 0) { Error("INTERNAL ERROR: Actual error exceeds epsilon", ANNabort); } #ifdef ANN_PERF ann_average_err += resultErr; // update statistics error #endif } //-------------------------------------------------------------------- // Compute rank errors (only needed for perf measurements) //-------------------------------------------------------------------- #ifdef ANN_PERF for (j = 0; j < near_neigh; j++) { if (curr_tru_idx[i] == ANN_NULL_IDX) // no more true neighbors? break; double rnkErr = 0.0; // rank error // reported j-th distance ANNdist rept_dist = curr_apx_dst[j]; int rnk = 0; // compute rank of this item while (rnk < true_nn && curr_tru_dst[rnk] <= rept_dist) rnk++; if (j+1-rnk > 0) rnkErr = (double) (j+1-rnk); ann_rank_err += rnkErr; // update average rank error } #endif //---------------------------------------------------------------- // Check range counts from fixed-radius query //---------------------------------------------------------------- if (radius_bound != 0) { // fixed-radius search if (apx_pts_in_range[i] < min_pts_in_range[i] || apx_pts_in_range[i] > max_pts_in_range[i]) Error("INTERNAL ERROR: Invalid fixed-radius range count", ANNabort); } curr_apx_idx += near_neigh; curr_apx_dst += near_neigh; curr_tru_idx += true_nn; // increment current pointers curr_tru_dst += true_nn; }}//----------------------------------------------------------------------// treeStats// Computes a number of statistics related to kd_trees and// bd_trees. These statistics are printed if in verbose mode,// and otherwise they are only printed if they are deemed to// be outside of reasonable operating bounds.//----------------------------------------------------------------------#define log2(x) (log(x)/log(2.0)) // log base 2void treeStats( ostream &out, // output stream ANNbool verbose) // print stats{ const int MIN_PTS = 20; // min no. pts for checking const float MAX_FRAC_TL = 0.50; // max frac of triv leaves const float MAX_AVG_AR = 20; // max average aspect ratio ANNkdStats st; // statistics structure the_tree->getStats(st); // get statistics // total number of nodes int n_nodes = st.n_lf + st.n_spl + st.n_shr; // should be O(n/bs) int opt_n_nodes = (int) (2*(float(st.n_pts)/st.bkt_size)); int too_many_nodes = 10*opt_n_nodes; if (st.n_pts >= MIN_PTS && n_nodes > too_many_nodes) { out << "-----------------------------------------------------------\n"; out << "Warning: The tree has more than 10x as many nodes as points.\n"; out << "You may want to consider a different split or shrink method.\n"; out << "-----------------------------------------------------------\n"; verbose = ANNtrue; } // fraction of trivial leaves float frac_tl = (st.n_lf == 0 ? 0 : ((float) st.n_tl)/ st.n_lf); if (st.n_pts >= MIN_PTS && frac_tl > MAX_FRAC_TL) { out << "-----------------------------------------------------------\n"; out << "Warning: A significant fraction of leaves contain no points.\n"; out << "You may want to consider a different split or shrink method.\n"; out << "-----------------------------------------------------------\n"; verbose = ANNtrue; } // depth should be O(dim*log n) int too_many_levels = (int) (2.0 * st.dim * log2((double) st.n_pts)); int opt_levels = (int) log2(double(st.n_pts)/st.bkt_size); if (st.n_pts >= MIN_PTS && st.depth > too_many_levels) { out << "-----------------------------------------------------------\n"; out << "Warning: The tree is more than 2x as deep as (dim*log n).\n"; out << "You may want to consider a different split or shrink method.\n"; out << "-----------------------------------------------------------\n"; verbose = ANNtrue; } // average leaf aspect ratio if (st.n_pts >= MIN_PTS && st.avg_ar > MAX_AVG_AR) { out << "-----------------------------------------------------------\n"; out << "Warning: Average aspect ratio of cells is quite large.\n"; out << "This may slow queries depending on the point distribution.\n"; out << "-----------------------------------------------------------\n"; verbose = ANNtrue; } //------------------------------------------------------------------ // Print summaries if requested //------------------------------------------------------------------ if (verbose) { // output statistics out << " (Structure Statistics:\n"; out << " n_nodes = " << n_nodes << " (opt = " << opt_n_nodes << ", best if < " << too_many_nodes << ")\n" << " n_leaves = " << st.n_lf << " (" << st.n_tl << " contain no points)\n" << " n_splits = " << st.n_spl << "\n" << " n_shrinks = " << st.n_shr << "\n"; out << " empty_leaves = " << frac_tl*100 << " percent (best if < " << MAX_FRAC_TL*100 << " percent)\n"; out << " depth = " << st.depth << " (opt = " << opt_levels << ", best if < " << too_many_levels << ")\n"; out << " avg_aspect_ratio = " << st.avg_ar << " (best if < " << MAX_AVG_AR << ")\n"; out << " )\n"; }}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -