⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 ann_test.cpp

📁 c++实现的KNN库:建立高维度的K-d tree,实现K邻域搜索
💻 CPP
📖 第 1 页 / 共 5 页
字号:
	//--------------------------------------------------------------------	if (stats > SILENT) {		if (type == DATA) {			cout << "[Read Data Points:\n";			cout << "  data_size  = " << n << "\n";		}		else {			cout << "[Read Query Points:\n";			cout << "  query_size = " << n << "\n";		}		cout << "  file_name  = " << file_nm << "\n";		cout << "  dim        = " << dim << "\n";												// print if results requested		if ((type == DATA && stats >= SHOW_PTS) ||			(type == QUERY && stats >= QUERY_RES)) {			cout << "  (Points:\n";			for (i = 0; i < n; i++) {				cout << "    " << i << "\t";				printPoint(pa[i], dim);				cout << "\n";			}			cout << "  )\n";		}		cout << "]\n";	}}//------------------------------------------------------------------------//	getTrueNN//		Computes the true nearest neighbors.  For purposes of validation,//		this intentionally done in a rather dumb (but safe way), by//		invoking the brute-force search.////		The number of true nearest neighbors is somewhat larger than//		the number of nearest neighbors.  This is so that the validation//		can determine the expected difference in element ranks.////		This procedure is invoked just prior to running queries.  Since//		the operation takes a long time, it is performed only if needed.//		In particular, once generated, it will be regenerated only if//		new query or data points are generated, or if the requested number//		of true near neighbors or approximate near neighbors has changed.////		To validate fixed-radius searching, we compute two counts, one//		with the original query radius (trueSqRadius) and the other with//		a radius shrunken by the error factor (minSqradius).  We then//		check that the count of points inside the approximate range is//		between these two bounds.  Because fixed-radius search is//		allowed to ignore points within the shrunken radius, we only//		compute exact neighbors within this smaller distance (for we//		cannot guarantee that we will even visit the other points).//------------------------------------------------------------------------void getTrueNN()						// compute true nearest neighbors{	if (stats > SILENT) {		cout << "(Computing true nearest neighbors for validation.  This may take time.)\n";	}												// deallocate existing storage	if (true_nn_idx			!= NULL) delete [] true_nn_idx;	if (true_dists			!= NULL) delete [] true_dists;	if (min_pts_in_range	!= NULL) delete [] min_pts_in_range;	if (max_pts_in_range	!= NULL) delete [] max_pts_in_range;	if (true_nn > data_size) {					// can't get more nn than points		true_nn = data_size;	}												// allocate true answer storage	true_nn_idx = new ANNidx[true_nn*query_size];	true_dists  = new ANNdist[true_nn*query_size];	min_pts_in_range = new int[query_size];	max_pts_in_range = new int[query_size];	ANNidxArray  curr_nn_idx = true_nn_idx;		// current locations in arrays	ANNdistArray curr_dists = true_dists;												// allocate search structure	ANNbruteForce *the_brute = new ANNbruteForce(data_pts, data_size, dim);												// compute nearest neighbors	for (int i = 0; i < query_size; i++) {		if (radius_bound == 0) {				// standard kNN search			the_brute->annkSearch(				// compute true near neighbors						query_pts[i],			// query point						true_nn,				// number of nearest neighbors						curr_nn_idx,			// where to put indices						curr_dists);			// where to put distances		}		else {									// fixed radius kNN search												// search radii limits			ANNdist trueSqRadius = ANN_POW(radius_bound);			ANNdist minSqRadius = ANN_POW(radius_bound / (1+epsilon));			min_pts_in_range[i] = the_brute->annkFRSearch(						query_pts[i],			// query point						minSqRadius,			// shrunken search radius						true_nn,				// number of near neighbors						curr_nn_idx,			// nearest neighbors (returned)						curr_dists);			// distance (returned)			max_pts_in_range[i] = the_brute->annkFRSearch(						query_pts[i],			// query point						trueSqRadius,			// true search radius						0, NULL, NULL);			// (ignore kNN info)		}		curr_nn_idx += true_nn;					// increment nn index pointer		curr_dists  += true_nn;					// increment nn dist pointer	}	delete the_brute;							// delete brute-force struct	valid_dirty = ANNfalse;						// validation good for now}//------------------------------------------------------------------------//	doValidation//		Compares the approximate answers to the k-nearest neighbors//		against the true nearest neighbors (computed earlier). It is//		assumed that the true nearest neighbors and indices have been//		computed earlier.////		First, we check that all the results are within their allowed//		limits, and generate an internal error, if not.  For the sake of//		performance evaluation, we also compute the following two//		quantities for nearest neighbors:////		Average Error//		-------------//		The relative error between the distance to a reported nearest//		neighbor and the true nearest neighbor (of the same rank),////		Rank Error//		----------//		The difference in rank between the reported nearest neighbor and//		its position (if any) among the true nearest neighbors.  If we//		cannot find this point among the true nearest neighbors, then//		it assumed that the rank of the true nearest neighbor is true_nn+1.////		Because of the possibility of duplicate distances, this is computed//		as follows.  For the j-th reported nearest neighbor, we count the//		number of true nearest neighbors that are at least this close.  Let//		this be rnk.  Then the rank error is max(0, j-rnk).  (In the code//		below, j is an array index and so the first item is 0, not 1.  Thus//		we take max(0, j+1-rnk) instead.)////		For the results of fixed-radious range count, we verify that the//		reported number of points in the range lies between the actual//		number of points in the shrunken and the true search radius.//------------------------------------------------------------------------void doValidation()						// perform validation{	int*		  curr_apx_idx = apx_nn_idx;	// approx index pointer	ANNdistArray  curr_apx_dst = apx_dists;		// approx distance pointer	int*		  curr_tru_idx = true_nn_idx;	// true index pointer	ANNdistArray  curr_tru_dst = true_dists;	// true distance pointer	int i, j;	if (true_nn < near_neigh) {		Error("Cannot validate with fewer true near neighbors than actual", ANNabort);	}	for (i = 0; i < query_size; i++) {			// validate each query		//----------------------------------------------------------------		//	Compute result errors		//		In fixed radius search it is possible that not all k		//		nearest neighbors were computed.  Because the true		//		results are computed over the shrunken radius, we should		//		have at least as many true nearest neighbors as		//		approximate nearest neighbors. (If not, an infinite		//		error will be generated, and so an internal error will		//		will be generated.		//		//		Because nearest neighbors are sorted in increasing order		//		of distance, as soon as we see a null index, we can		//		terminate the distance checking.  The error in the		//		result should not exceed epsilon.  However, if		//		max_pts_visit is nonzero (meaning that the search is		//		terminated early) this might happen.		//----------------------------------------------------------------		for (j = 0; j < near_neigh; j++) {			if (curr_tru_idx[j] == ANN_NULL_IDX)// no more true neighbors?				break;												// true i-th smallest distance			double true_dist = ANN_ROOT(curr_tru_dst[j]);												// reported i-th smallest			double rept_dist = ANN_ROOT(curr_apx_dst[j]);												// better than optimum?			if (rept_dist < true_dist*(1-ERR)) {				Error("INTERNAL ERROR: True nearest neighbor incorrect",						ANNabort);			}			double resultErr;					// result error			if (true_dist == 0.0) {				// let's not divide by zero				if (rept_dist != 0.0) resultErr = ANN_DBL_MAX;				else				  resultErr = 0.0;			}			else {				resultErr = (rept_dist - true_dist) / ((double) true_dist);			}			if (resultErr > epsilon && max_pts_visit == 0) {				Error("INTERNAL ERROR: Actual error exceeds epsilon",						ANNabort);			}			#ifdef ANN_PERF				ann_average_err += resultErr;	// update statistics error			#endif		}		//--------------------------------------------------------------------		//  Compute rank errors (only needed for perf measurements)		//--------------------------------------------------------------------		#ifdef ANN_PERF			for (j = 0; j < near_neigh; j++) {				if (curr_tru_idx[i] == ANN_NULL_IDX) // no more true neighbors?					break;				double rnkErr = 0.0;			// rank error												// reported j-th distance				ANNdist rept_dist = curr_apx_dst[j];				int rnk = 0;					// compute rank of this item				while (rnk < true_nn && curr_tru_dst[rnk] <= rept_dist)					rnk++;				if (j+1-rnk > 0) rnkErr = (double) (j+1-rnk);				ann_rank_err += rnkErr;			// update average rank error			}		#endif		//----------------------------------------------------------------		//	Check range counts from fixed-radius query		//----------------------------------------------------------------		if (radius_bound != 0) {				// fixed-radius search			if  (apx_pts_in_range[i] < min_pts_in_range[i] ||				 apx_pts_in_range[i] > max_pts_in_range[i])				Error("INTERNAL ERROR: Invalid fixed-radius range count",						ANNabort);		}		curr_apx_idx += near_neigh;		curr_apx_dst += near_neigh;		curr_tru_idx += true_nn;				// increment current pointers		curr_tru_dst += true_nn;	}}//----------------------------------------------------------------------//	treeStats//		Computes a number of statistics related to kd_trees and//		bd_trees.  These statistics are printed if in verbose mode,//		and otherwise they are only printed if they are deemed to//		be outside of reasonable operating bounds.//----------------------------------------------------------------------#define log2(x) (log(x)/log(2.0))				// log base 2void treeStats(	ostream		&out,							// output stream	ANNbool		verbose)						// print stats{	const int	MIN_PTS		= 20;				// min no. pts for checking	const float MAX_FRAC_TL = 0.50;				// max frac of triv leaves	const float MAX_AVG_AR	= 20;				// max average aspect ratio	ANNkdStats st;								// statistics structure	the_tree->getStats(st);						// get statistics												// total number of nodes	int n_nodes = st.n_lf + st.n_spl + st.n_shr;												// should be O(n/bs)	int opt_n_nodes = (int) (2*(float(st.n_pts)/st.bkt_size));	int too_many_nodes = 10*opt_n_nodes;	if (st.n_pts >= MIN_PTS && n_nodes > too_many_nodes) {		out << "-----------------------------------------------------------\n";		out << "Warning: The tree has more than 10x as many nodes as points.\n";		out << "You may want to consider a different split or shrink method.\n";		out << "-----------------------------------------------------------\n";		verbose = ANNtrue;	}												// fraction of trivial leaves	float frac_tl = (st.n_lf == 0 ? 0 : ((float) st.n_tl)/ st.n_lf);	if (st.n_pts >= MIN_PTS && frac_tl > MAX_FRAC_TL) {		out << "-----------------------------------------------------------\n";		out << "Warning: A significant fraction of leaves contain no points.\n";		out << "You may want to consider a different split or shrink method.\n";		out << "-----------------------------------------------------------\n";		verbose = ANNtrue;	}												// depth should be O(dim*log n)	int too_many_levels = (int) (2.0 * st.dim * log2((double) st.n_pts));	int opt_levels = (int) log2(double(st.n_pts)/st.bkt_size);	if (st.n_pts >= MIN_PTS && st.depth > too_many_levels) {		out << "-----------------------------------------------------------\n";		out << "Warning: The tree is more than 2x as deep as (dim*log n).\n";		out << "You may want to consider a different split or shrink method.\n";		out << "-----------------------------------------------------------\n";		verbose = ANNtrue;	}												// average leaf aspect ratio	if (st.n_pts >= MIN_PTS && st.avg_ar > MAX_AVG_AR) {		out << "-----------------------------------------------------------\n";		out << "Warning: Average aspect ratio of cells is quite large.\n";		out << "This may slow queries depending on the point distribution.\n";		out << "-----------------------------------------------------------\n";		verbose = ANNtrue;	}	//------------------------------------------------------------------	//  Print summaries if requested	//------------------------------------------------------------------	if (verbose) {								// output statistics		out << "  (Structure Statistics:\n";		out << "    n_nodes          = " << n_nodes			<< " (opt = " << opt_n_nodes			<< ", best if < " << too_many_nodes << ")\n"			<< "        n_leaves     = " << st.n_lf			<< " (" << st.n_tl << " contain no points)\n"			<< "        n_splits     = " << st.n_spl << "\n"			<< "        n_shrinks    = " << st.n_shr << "\n";		out << "    empty_leaves     = " << frac_tl*100			<< " percent (best if < " << MAX_FRAC_TL*100 << " percent)\n";		out << "    depth            = " << st.depth			<< " (opt = " << opt_levels			<< ", best if < " << too_many_levels << ")\n";		out << "    avg_aspect_ratio = " << st.avg_ar			<< " (best if < " << MAX_AVG_AR << ")\n";		out << "  )\n";	}}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -