📄 mixturepdf.hpp
字号:
#ifndef INDII_ML_AUX_MIXTUREPDF_HPP#define INDII_ML_AUX_MIXTUREPDF_HPP#include "Pdf.hpp"#include <vector>namespace indii { namespace ml { namespace aux { /** * Mixture probability density. * * @author Lawrence Murray <lawrence@indii.org> * @version $Rev: 571 $ * @date $Date: 2008-09-24 15:06:22 +0100 (Wed, 24 Sep 2008) $ * * @param T Component type, should be derivative of Pdf. * * @section MixturePdf_serialization Serialization * * This class supports serialization through the Boost.Serialization library. * * @section MixturePdf_parallel Parallelisation * * This class supports distributed storage. MixturePdf objects may be * instantiated on all nodes of a communicator, each with a different * set of mixture components, such that the union of all components * defines the full distribution. * * "Regular" methods such as getSize(), getTotalWeight() and * getExpectation() use only those components stored on the local * node. Special "distributed" methods such as * getDistributedSize(), getDistributedTotalWeight() and * getDistributedExpectation() use all components stored across the * nodes. */template <class T>class MixturePdf : public Pdf {public: /** * Default constructor. * * Initialises the mixture with zero dimensions. This should * generally only be used when the object is to be restored from a * serialization. Indeed, there is no other way to resize the * mixture to nonzero dimensionality except by subsequently * restoring from a serialization. */ MixturePdf(); /** * Constructor. One or more components should be added with * add() after construction. * * @param N Dimensionality of the distribution. */ MixturePdf(const unsigned int N); /** * Destructor. */ virtual ~MixturePdf(); /** * Assignment operator. Both sides must have the same dimensionality, * but may have different number of components. */ MixturePdf<T>& operator=(const MixturePdf<T>& o); virtual void setDimensions(const unsigned int N, const bool preserve = false); /** * @name Local storage methods * * Use these methods if: * * @li You are not working in a parallel environment, * @li You are working in a parallel environment but are not using * the distributed storage features of this class, or * @li You are working in a parallel environment and are using the * distributed storage features of this class, but only want to deal * with the mixture components stored on the local node. */ //@{ /** * Add a component to the distribution on the local node. * * @param x The component. * @param w Unnormalised weight of the component. * * The new component is added to the end of the list of components in * terms of indices used by get(), getWeight(), etc. */ void add(const T& x, const double w = 1.0); /** * Get component on the local node. * * @param i Index of the component. * * @return The @p i th component. */ const T& get(const unsigned int i) const; /** * Get component on the local node. * * @param i Index of the component. * * @return The @p i th component. * * @note Modifying any of the components in the mixture may have * unintended side effects, especially since Pdf classes rely heavily on * precalculations which may consequently become out of date. This method * is provided only for those situations where precalculations must be * made within the component objects themselves, such as within their * getExpectation() methods, such that a non-const context is required. * For all other situations, favour the const version of this method. */ T& get(const unsigned int i); /** * Set component on the local node. * * @param i Index of the component. * @param x Value of the component. */ void set(const unsigned int i, const T&); /** * Get components on the local node. * * @return \f$\{(\mathbf{x}^{(i)},w^{(i)})\}\f$; set of weighted * components defining the distribution, as a vector. */ const std::vector<T>& getAll() const; /** * Get components on the local node. * * @return \f$\{(\mathbf{x}^{(i)},w^{(i)})\}\f$; set of weighted * components defining the distribution, as a vector. * * @note Modifying any of the components in the mixture may have * unintended side effects, especially since Pdf classes rely heavily on * precalculations which may consequently become out of date. This method * is provided only for those situations where precalculations must be * made within the component objects themselves, such as within their * getExpectation() methods, such that a non-const context is required. * For all other situations, favour the const version of this method. */ std::vector<T>& getAll(); /** * Get component weight on the local node. * * @param i Index of the component. * * @return Weight of the @p i th component. */ double getWeight(const unsigned int i) const; /** * Set component weight on the local node. * * @param i Index of the component. * @param w Weight of the @p i th component. */ void setWeight(const unsigned int i, const double w); /** * Get the weights of all components on the local node. * * @return Vector of the weights of all components. */ const vector& getWeights() const; /** * Set the weights of all components on the local node. * * @param ws Vector of the weights of all components. */ void setWeights(const vector& ws); /** * Clear all components from the distribution on the local node. */ void clear(); /** * Get the number of components in the distribution on the local * node. * * @return \f$K\f$; the number of components in the distribution. */ unsigned int getSize() const; /** * Get the total weight of components on the local node. * * @return \f$W\f$; the total weight of components. */ double getTotalWeight() const; /** * Normalise weights on the local node to sum to 1. * * @warning If in a distributed storage situation this is probably * not what you want. Consider distributedNormalise() * instead. Normalising the weights only on the local node will skew * the weighting of mixture components across all nodes. */ void normalise(); /** * Sample from the distribution on the local node. * * @return A sample from the distribution. */ virtual vector sample(); /** * Calculate the density on the local node at a given point. * * \f[p(\mathbf{x}) = \frac{1}{W}\sum_{i=1}^{K}w^{(i)}p^{(i)}(\mathbf{x})\f] * * @param x \f$\mathbf{x}\f$; the point at which to calculate the * density. * * @return \f$p(\mathbf{x})\f$; the density at \f$\mathbf{x}\f$. */ virtual double densityAt(const vector& x); /** * Get the expected value of the distribution on the local node. * * @return \f$\mathbf{\mu}\f$; expected value of the distribution. */ virtual const vector& getExpectation(); //@} /** * @name Distributed storage methods * * Use these methods if: * * @li You are working in a parallel environment and are using the * distributed storage features of this class, and want to deal with * the mixture components stored across all nodes. * * These methods are used for distributed storage of mixtures. They * require synchronization and communication between all nodes in * the communicator, such that if any of these methods is called on * one node, the same method should eventually, and preferably as * soon as possible, be called on all other nodes in the same * communicator. */ //@{ /** * Get the number of components in the distribution across all * nodes. * * @return \f$\sum_i K_i\f$; the number of components in the distribution. */ unsigned int getDistributedSize() const; /** * Get the total weight of components across all nodes. * * @return \f$\sum_i W_i\f$; the total weight of components across * all nodes. */ double getDistributedTotalWeight() const; /** * Normalise weights across all nodes to sum to 1. */ void distributedNormalise(); /** * Sample from the full distribution. * * @return A sample from the full distribution. */ virtual vector distributedSample(); /** * Draw multiple samples from the full distribution. This is * significantly more efficient than multiple calls to * distributedSample(), as it involves less message passing. * * @param P \f$P\f$; number of samples to draw. * * @return Vector of samples drawn on the local node. The number of * samples drawn across all nodes will total \f$P\f$. */ virtual std::vector<vector> distributedSample(const unsigned int P); /** * Calculate the density of the full distribution at a given point. * * @param x \f$\mathbf{x}\f$; the point at which to calculate the * density. * * @return \f$p(\mathbf{x})\f$; the density at \f$\mathbf{x}\f$. */ virtual double distributedDensityAt(const vector& x); /** * Perform multiple density calculations from the full distribution. This * is significantly more efficient than multiple calls to * distributedDensityAt(const vector&), as it involves less message * passing. * * @param xs The points on this node at which to calculate densities. * * @return The densities at the given points on this node. * * Note that while each node is passed only its set of points and returns * only the density calculations for its set of points, all nodes * participate in the calculation for all points. * * @see distributedDensityAt(const vector&); */ virtual vector distributedDensityAt(std::vector<vector>& xs); /** * Get the expected value of the full distribution. * * @return \f$\mathbf{\mu}\f$; expected value of the full * distribution. */ virtual vector getDistributedExpectation(); /** * Redistribute components across nodes by number. This attempts to * redistribute the components of the full distribution across * nodes, so that each node stores as close to an equal number of * components as possible. */ void redistributeBySize(); /** * Redistribute components across nodes by weight. This attempts to * redistribute the components of the full distribution across * nodes, so that the total weight of components at each node is as * close to an equal number as possible. */ void redistributeByWeight(); //@}protected: /** * Called when changes are made to the distribution, such as when a * new component is added. This allows pre-calculations to be * refreshed. */ virtual void dirty();private: /** * Node property. */ template <class P> struct node_property { /** * Node rank. */ unsigned int rank; /** * Property at node. */ P prop; /** * Comparison operator for sorting. * * @return True if this node's property is greater than the * argument's property. */ bool operator<(const node_property& o) const; /** * Serialize, or restore from serialization. */ template<class Archive> void serialize(Archive& ar, const unsigned int version); /* * Boost.Serialization requirements. */ friend class boost::serialization::access; }; typedef struct node_property<int> node_count; typedef std::vector<node_count> node_count_vector; typedef typename node_count_vector::iterator node_count_iterator; typedef struct node_property<double> node_weight; typedef std::vector<node_weight> node_weight_vector; typedef typename node_weight_vector::iterator node_weight_iterator; /** * Components. */ std::vector<T> xs; /** * Component weights. */ indii::ml::aux::vector ws; /** * Cumulative component weights. */ std::vector<double> Ws; /** * Last calculated expectation. */ vector mu; /** * Last calculated unnormalised expectation. */ vector Zmu; /** * Is the last calculated expectation up to date? */ bool haveMu; /** * Calculate expectation from current components on the local node. */ void calculateExpectation(); /** * Serialize. */ template<class Archive> void save(Archive& ar, const unsigned int version) const; /** * Restore from serialization. */ template<class Archive> void load(Archive& ar, const unsigned int version); /* * Boost.Serialization requirements. */ BOOST_SERIALIZATION_SPLIT_MEMBER() friend class boost::serialization::access;}; } }}#include "Random.hpp"#include "parallel.hpp"#include "boost/serialization/base_object.hpp"#include "boost/serialization/vector.hpp"#include "boost/mpi.hpp"#include "boost/mpi/environment.hpp"#include "boost/mpi/communicator.hpp"#include <algorithm>template <class T>indii::ml::aux::MixturePdf<T>::MixturePdf() : xs(0), ws(0), Ws(0), mu(0), Zmu(0) { haveMu = false;}template <class T>indii::ml::aux::MixturePdf<T>::MixturePdf(const unsigned int N) : Pdf(N), xs(0), ws(0), Ws(0), mu(N), Zmu(N) { haveMu = false;}template <class T>indii::ml::aux::MixturePdf<T>::~MixturePdf() { //}template <class T>indii::ml::aux::MixturePdf<T>& indii::ml::aux::MixturePdf<T>::operator=( const MixturePdf<T>& o) { /* pre-condition */ assert (N == o.N); xs = o.xs; ws.resize(o.ws.size(), false); ws = o.ws; Ws.resize(o.Ws.size(), false); Ws = o.Ws; haveMu = o.haveMu; if (haveMu) { mu = o.mu; Zmu = o.Zmu; } return *this;}template <class T>void indii::ml::aux::MixturePdf<T>::add(const T& x, const double w) { /* pre-condition */ assert (x.getDimensions() == getDimensions()); /* component */ xs.push_back(x); /* weight */ ws.resize(ws.size() + 1, true); ws(ws.size() - 1) = w; /* cumulative weight */ Ws.push_back(getTotalWeight() + w); dirty(); /* post-condition */ assert (xs.size() == ws.size()); assert (xs.size() == Ws.size());}template <class T>inline const T& indii::ml::aux::MixturePdf<T>::get(const unsigned int i) const { return xs[i];}template <class T>inline T& indii::ml::aux::MixturePdf<T>::get(const unsigned int i) { return xs[i];}template <class T>void indii::ml::aux::MixturePdf<T>::set(const unsigned int i, const T& x) { xs[i] = x; dirty();}template <class T>inline const std::vector<T>& indii::ml::aux::MixturePdf<T>::getAll() const { return xs;}template <class T>inline std::vector<T>& indii::ml::aux::MixturePdf<T>::getAll() { return xs;}template <class T>inline double indii::ml::aux::MixturePdf<T>::getWeight( const unsigned int i) const { return ws(i);}template <class T>void indii::ml::aux::MixturePdf<T>::setWeight(const unsigned int i, const double w) { /* pre-condition */ assert (i < getSize()); this->ws(i) = w; /* recalculate cumulative weights */ unsigned int j = i; if (j == 0) { Ws[0] = ws(0);
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -