📄 kde.hpp
字号:
#ifndef INDII_ML_AUX_KDE_HPP#define INDII_ML_AUX_KDE_HPP#include "PartitionTree.hpp"/** * @file kde.hpp * * Provides convenience methods for working with kernel density * approximations. */namespace indii { namespace ml { namespace aux {/** * Calculate \f$h_{opt}\f$. * * @param N Number of dimensions. * @param P Number of samples. * * Note: * * \f[ * h_{opt} = \left[\frac{4}{(N+2)P}\right]^{\frac{1}{N+4}}\,, * \f] * * this being the optimal bandwidth for a kernel density estimate * over \f$P\f$ samples from a standard \f$N\f$-dimensional Gaussian * distribution, and Gaussian kernel (@ref Silverman1986 * "Silverman, 1986"). We find this useful as a rule of thumb for * setting kernel density estimate bandwidths. * * @section hopt_references References * * @anchor Silverman1986 * Silverman, B.W. <i>Density Estimation for Statistics and Data * Analysis</i>. Chapman and Hall, <b>1986</b>. */double hopt(const unsigned int N, const unsigned int P);/** * Dual-tree kernel density evaluation. * * @param NT Norm type. * @param KT Kernel type. * @param PT Partition tree type. * * @param queryTree Query tree. * @param targetTree Target tree. * @param w Weight vector. * @param N Norm. * @param K Kernel. * @param normalise Normalise results. * * @return Vector of the density estimates for each of the points in * queryTree, ordered according to its underlying data. */template <class NT, class KT, class PT>vector dualTreeDensity(PT& queryTree, PT& targetTree, const vector& w, const NT& N, const KT& K, const bool normalise = true); /** * Distributed dual-tree kernel density evaluation. * * @param NT Norm type. * @param KT Kernel type. * @param PT Partition tree type. * * @param queryTree Query tree. * @param targetTree Target tree. * @param w Weight vector. * @param N Norm. * @param K Kernel. * @param normalise Normalise results. * * @return Vector of the density estimates for each of the points in * queryTree, ordered according to its underlying data. Note that while * only the results for the local query components are returned, all * nodes participate in the evaluation. * * Note that queryTree and targetTree should have different underlying * DiracMixturePdf objects. */template <class NT, class KT, class PT>vector distributedDualTreeDensity(PT& queryTree, PT& targetTree, const vector& w, const NT& N, const KT& K, const bool normalise = true);/** * Self-tree kernel density evaluation. * * @param NT Norm type. * @param KT Kernel type. * @param PT Partition tree type. * * @param tree Tree. * @param w Weight vector. * @param N Norm. * @param K Kernel. * @param normalise Normalise results. * * @return Vector of the density estimates for each of the points in * queryTree, ordered according to its underlying data. */template <class NT, class KT, class PT>vector selfTreeDensity(PT& tree, const vector& w, const NT& N, const KT& K, const bool normalise = true); /** * Distributed self-tree kernel density evaluation. * * @param NT Norm type. * @param KT Kernel type. * @param PT Partition tree type. * * @param tree Tree. * @param w Weight vector. * @param N Norm. * @param K Kernel. * @param normalise Normalise results. * * @return Vector of the density estimates for each of the points in * queryTree, ordered according to its underlying data. Note that while * only the results for the local query components are returned, all * nodes participate in the evaluation. */template <class NT, class KT, class PT>vector distributedSelfTreeDensity(PT& tree, const vector& w, const NT& N, const KT& K, const bool normalise = true);/** * Dual-tree kernel density evaluation with multiple mixture model * weights. * * @param NT Norm type. * @param KT Kernel type. * @param PT Partition tree type. * * @param queryTree Query tree. * @param targetTree Target tree. * @param ws Weight matrix. Each row gives a weight vector for one mixture * model. * @param N Norm. * @param K Kernel. * @param normalise Normalise results. * * @return Matrix where each row provides the density estimates for each * of the points in queryTree, ordered according its underlying data, and * using the weights of the corresponding row in ws. The weights * underlying queryTree and targetTree are ignored. Optimisations are * made in the case that the queryTree and targetTree are identical and * over the same underlying data. */template <class NT, class KT, class PT>matrix dualTreeDensity(PT& queryTree, PT& targetTree, const matrix& ws, const NT& N, const KT& K, const bool normalise = true); /** * Distributed dual-tree kernel density evaluation with multiple mixture * model weights. * * @param NT Norm type. * @param KT Kernel type. * @param PT Partition tree type. * * @param queryTree Query tree. * @param targetTree Target tree. * @param ws Weight matrix. Each row gives a weight vector for one mixture * model. * @param N Norm. * @param K Kernel. * @param normalise Normalise results. * * @return Matrix where each row provides the density estimates for each * of the points in queryTree, ordered according its underlying data, and * using the weights of the corresponding row in ws. The weights * underlying queryTree and targetTree are ignored. * * Note that queryTree and targetTree should have different underlying * DiracMixturePdf objects. */template <class NT, class KT, class PT>matrix distributedDualTreeDensity(PT& queryTree, PT& targetTree, const matrix& ws, const NT& N, const KT& K, const bool normalise = true);/** * Self-tree kernel density evaluation with multiple mixture model * weights. * * @param NT Norm type. * @param KT Kernel type. * @param PT Partition tree type. * * @param tree Tree. * @param ws Weight matrix. Each row gives a weight vector for one mixture * model. * @param N Norm. * @param K Kernel. * @param normalise Normalise results. * * @return Matrix where each row provides the density estimates for each * of the points in queryTree, ordered according its underlying data, and * using the weights of the corresponding row in ws. The weights underlying * tree are ignored. */template <class NT, class KT, class PT>matrix selfTreeDensity(PT& tree, const matrix& ws, const NT& N, const KT& K, const bool normalise = true); /** * Distributed self tree kernel density evaluation with multiple mixture * model weights. * * @param NT Norm type. * @param KT Kernel type. * @param PT Partition tree type. * * @param tree Tree. * @param ws Weight matrix. Each row gives a weight vector for one mixture * model. * @param N Norm. * @param K Kernel. * @param normalise Normalise results. * * @return Matrix where each row provides the density estimates for each * of the points in queryTree, ordered according its underlying data, and * using the weights of the corresponding row in ws. The weights underlying * tree are ignored. */template <class NT, class KT, class PT>matrix distributedSelfTreeDensity(PT& tree, const matrix& ws, const NT& N, const KT& K, const bool normalise = true);/** * Cross-tree kernel density evaluation with multiple mixture model * weights. Simultaneously performs kernel density estimation of two * trees to each other. * * @param NT Norm type. * @param KT Kernel type. * @param PT Partition tree type. * * @param tree1 First tree. * @param tree2 Second tree. * @param ws1 Weight matrix for first tree as target. Each row gives a * weight vector for one mixture model. * @param ws2 Weight matrix for second tree as target. Each row gives a * weight vector for one mixture model. * @param N Norm. * @param K Kernel. * @param result1 On return, unnormalised result for first tree as query, * second as target, is added to this. * @param result2 On return, unnormalised result for second tree as query, * first as target, is added to this. * @param clear Clear result matrices before addition. * @param normalise Normalise results. * * Intended mainly for internal use. */template <class NT, class KT, class PT>void crossTreeDensity(PT& tree1, PT& tree2, const matrix& ws1, const matrix& ws2, const NT& N, const KT& K, matrix& result1, matrix& result2, const bool clear = true, const bool normalise = true); } }}#include "DiracMixturePdf.hpp"#include "KDTree.hpp"#include "vector.hpp"#include "matrix.hpp"#include <stack>inline double indii::ml::aux::hopt(const unsigned int N, const unsigned int P) { return pow(4.0/((N+2)*P), 1.0/(N+4.0));}template <class NT, class KT, class PT>indii::ml::aux::vector indii::ml::aux::dualTreeDensity( PT& queryTree, PT& targetTree, const vector& w, const NT& N, const KT& K, const bool normalise) { aux::matrix ws(1,w.size()); row(ws,0) = w; return row(dualTreeDensity(queryTree, targetTree, ws, N, K, normalise), 0);}template <class NT, class KT, class PT>indii::ml::aux::vector indii::ml::aux::distributedDualTreeDensity( PT& queryTree, PT& targetTree, const vector& w, const NT& N, const KT& K, const bool normalise) { aux::matrix ws(1,w.size()); row(ws,0) = w; return row(distributedDualTreeDensity(queryTree, targetTree, ws, N, K, normalise), 0);}template <class NT, class KT, class PT>indii::ml::aux::vector indii::ml::aux::selfTreeDensity( PT& tree, const vector& w, const NT& N, const KT& K, const bool normalise) { aux::matrix ws(1,w.size()); row(ws,0) = w; return row(selfTreeDensity(tree, ws, N, K, normalise), 0);}template <class NT, class KT, class PT>indii::ml::aux::vector indii::ml::aux::distributedSelfTreeDensity( PT& tree, const vector& w, const NT& N, const KT& K, const bool normalise) { aux::matrix ws(1,w.size()); row(ws,0) = w; return row(distributedSelfTreeDensity(tree, ws, N, K, normalise), 0);}template <class NT, class KT, class PT>indii::ml::aux::matrix indii::ml::aux::dualTreeDensity( PT& queryTree, PT& targetTree, const matrix& ws, const NT& N, const KT& K, const bool normalise) { /* pre-condition */ assert (ws.size2() == targetTree.getData()->getSize()); assert (queryTree.getData()->getDimensions() == targetTree.getData()->getDimensions()); DiracMixturePdf& q = *queryTree.getData(); DiracMixturePdf& p = *targetTree.getData(); PartitionTreeNode* queryRoot = queryTree.getRoot(); PartitionTreeNode* targetRoot = targetTree.getRoot(); matrix result(ws.size1(), q.getSize()); result.clear(); if (queryRoot != NULL && targetRoot != NULL) { std::stack<PartitionTreeNode*> queryNodes, targetNodes; vector x(p.getDimensions()); unsigned int i, j; double w, d; queryNodes.push(queryRoot); targetNodes.push(targetRoot); while (!queryNodes.empty()) { PartitionTreeNode& queryNode = *queryNodes.top(); queryNodes.pop(); PartitionTreeNode& targetNode = *targetNodes.top(); targetNodes.pop(); if (queryNode.isLeaf() && targetNode.isLeaf()) { i = queryNode.getIndex(); j = targetNode.getIndex(); noalias(x) = q.get(i) - p.get(j); d = K(N(x)); noalias(column(result,i)) += d * column(ws,j); } else if (queryNode.isLeaf() && targetNode.isPrune()) { i = queryNode.getIndex(); const std::vector<unsigned int>& js = targetNode.getIndices(); for (j = 0; j < js.size(); j++) { noalias(x) = q.get(i) - p.get(js[j]); d = K(N(x)); noalias(column(result,i)) += d * column(ws,js[j]); } } else if (queryNode.isPrune() && targetNode.isLeaf()) { const std::vector<unsigned int>& is = queryNode.getIndices(); j = targetNode.getIndex(); for (i = 0; i < is.size(); i++) { noalias(x) = q.get(is[i]) - p.get(j); d = K(N(x)); noalias(column(result,is[i])) += d * column(ws,j); } } else if (queryNode.isPrune() && targetNode.isPrune()) { const std::vector<unsigned int>& is = queryNode.getIndices(); const std::vector<unsigned int>& js = targetNode.getIndices(); for (i = 0; i < is.size(); i++) { for (j = 0; j < js.size(); j++) { noalias(x) = q.get(is[i]) - p.get(js[j]); d = K(N(x)); noalias(column(result,is[i])) += d * column(ws,js[j]); } } } else { /* should we recurse? */
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -