binomial.hpp

来自「Boost provides free peer-reviewed portab」· HPP 代码 · 共 725 行 · 第 1/2 页

HPP
725
字号
// boost\math\distributions\binomial.hpp// Copyright John Maddock 2006.// Copyright Paul A. Bristow 2007.// Use, modification and distribution are subject to the// Boost Software License, Version 1.0.// (See accompanying file LICENSE_1_0.txt// or copy at http://www.boost.org/LICENSE_1_0.txt)// http://en.wikipedia.org/wiki/binomial_distribution// Binomial distribution is the discrete probability distribution of// the number (k) of successes, in a sequence of// n independent (yes or no, success or failure) Bernoulli trials.// It expresses the probability of a number of events occurring in a fixed time// if these events occur with a known average rate (probability of success),// and are independent of the time since the last event.// The number of cars that pass through a certain point on a road during a given period of time.// The number of spelling mistakes a secretary makes while typing a single page.// The number of phone calls at a call center per minute.// The number of times a web server is accessed per minute.// The number of light bulbs that burn out in a certain amount of time.// The number of roadkill found per unit length of road// http://en.wikipedia.org/wiki/binomial_distribution// Given a sample of N measured values k[i],// we wish to estimate the value of the parameter x (mean)// of the binomial population from which the sample was drawn.// To calculate the maximum likelihood value = 1/N sum i = 1 to N of k[i]// Also may want a function for EXACTLY k.// And probability that there are EXACTLY k occurrences is// exp(-x) * pow(x, k) / factorial(k)// where x is expected occurrences (mean) during the given interval.// For example, if events occur, on average, every 4 min,// and we are interested in number of events occurring in 10 min,// then x = 10/4 = 2.5// http://www.itl.nist.gov/div898/handbook/eda/section3/eda366i.htm// The binomial distribution is used when there are// exactly two mutually exclusive outcomes of a trial.// These outcomes are appropriately labeled "success" and "failure".// The binomial distribution is used to obtain// the probability of observing x successes in N trials,// with the probability of success on a single trial denoted by p.// The binomial distribution assumes that p is fixed for all trials.// P(x, p, n) = n!/(x! * (n-x)!) * p^x * (1-p)^(n-x)// http://mathworld.wolfram.com/BinomialCoefficient.html// The binomial coefficient (n; k) is the number of ways of picking// k unordered outcomes from n possibilities,// also known as a combination or combinatorial number.// The symbols _nC_k and (n; k) are used to denote a binomial coefficient,// and are sometimes read as "n choose k."// (n; k) therefore gives the number of k-subsets  possible out of a set of n distinct items.// For example://  The 2-subsets of {1,2,3,4} are the six pairs {1,2}, {1,3}, {1,4}, {2,3}, {2,4}, and {3,4}, so (4; 2)==6.// http://functions.wolfram.com/GammaBetaErf/Binomial/ for evaluation.// But note that the binomial distribution// (like others including the poisson, negative binomial & Bernoulli)// is strictly defined as a discrete function: only integral values of k are envisaged.// However because of the method of calculation using a continuous gamma function,// it is convenient to treat it as if a continous function,// and permit non-integral values of k.// To enforce the strict mathematical model, users should use floor or ceil functions// on k outside this function to ensure that k is integral.#ifndef BOOST_MATH_SPECIAL_BINOMIAL_HPP#define BOOST_MATH_SPECIAL_BINOMIAL_HPP#include <boost/math/distributions/fwd.hpp>#include <boost/math/special_functions/beta.hpp> // for incomplete beta.#include <boost/math/distributions/complement.hpp> // complements#include <boost/math/distributions/detail/common_error_handling.hpp> // error checks#include <boost/math/distributions/detail/inv_discrete_quantile.hpp> // error checks#include <boost/math/special_functions/fpclassify.hpp> // isnan.#include <boost/math/tools/roots.hpp> // for root finding.#include <utility>namespace boost{  namespace math  {     template <class RealType, class Policy>     class binomial_distribution;     namespace binomial_detail{        // common error checking routines for binomial distribution functions:        template <class RealType, class Policy>        inline bool check_N(const char* function, const RealType& N, RealType* result, const Policy& pol)        {           if((N < 0) || !(boost::math::isfinite)(N))           {               *result = policies::raise_domain_error<RealType>(                  function,                  "Number of Trials argument is %1%, but must be >= 0 !", N, pol);               return false;           }           return true;        }        template <class RealType, class Policy>        inline bool check_success_fraction(const char* function, const RealType& p, RealType* result, const Policy& pol)        {           if((p < 0) || (p > 1) || !(boost::math::isfinite)(p))           {               *result = policies::raise_domain_error<RealType>(                  function,                  "Success fraction argument is %1%, but must be >= 0 and <= 1 !", p, pol);               return false;           }           return true;        }        template <class RealType, class Policy>        inline bool check_dist(const char* function, const RealType& N, const RealType& p, RealType* result, const Policy& pol)        {           return check_success_fraction(              function, p, result, pol)              && check_N(               function, N, result, pol);        }        template <class RealType, class Policy>        inline bool check_dist_and_k(const char* function, const RealType& N, const RealType& p, RealType k, RealType* result, const Policy& pol)        {           if(check_dist(function, N, p, result, pol) == false)              return false;           if((k < 0) || !(boost::math::isfinite)(k))           {               *result = policies::raise_domain_error<RealType>(                  function,                  "Number of Successes argument is %1%, but must be >= 0 !", k, pol);               return false;           }           if(k > N)           {               *result = policies::raise_domain_error<RealType>(                  function,                  "Number of Successes argument is %1%, but must be <= Number of Trials !", k, pol);               return false;           }           return true;        }        template <class RealType, class Policy>        inline bool check_dist_and_prob(const char* function, const RealType& N, RealType p, RealType prob, RealType* result, const Policy& pol)        {           if(check_dist(function, N, p, result, pol) && detail::check_probability(function, prob, result, pol) == false)              return false;           return true;        }         template <class T, class Policy>         T inverse_binomial_cornish_fisher(T n, T sf, T p, T q, const Policy& pol)         {            BOOST_MATH_STD_USING            // mean:            T m = n * sf;            // standard deviation:            T sigma = sqrt(n * sf * (1 - sf));            // skewness            T sk = (1 - 2 * sf) / sigma;            // kurtosis:            // T k = (1 - 6 * sf * (1 - sf) ) / (n * sf * (1 - sf));            // Get the inverse of a std normal distribution:            T x = boost::math::erfc_inv(p > q ? 2 * q : 2 * p, pol) * constants::root_two<T>();            // Set the sign:            if(p < 0.5)               x = -x;            T x2 = x * x;            // w is correction term due to skewness            T w = x + sk * (x2 - 1) / 6;            /*            // Add on correction due to kurtosis.            // Disabled for now, seems to make things worse?            //            if(n >= 10)               w += k * x * (x2 - 3) / 24 + sk * sk * x * (2 * x2 - 5) / -36;               */            w = m + sigma * w;            if(w < tools::min_value<T>())               return sqrt(tools::min_value<T>());            if(w > n)               return n;            return w;         }      template <class RealType, class Policy>      RealType quantile_imp(const binomial_distribution<RealType, Policy>& dist, const RealType& p, const RealType& q)      { // Quantile or Percent Point Binomial function.        // Return the number of expected successes k,        // for a given probability p.        //        // Error checks:        BOOST_MATH_STD_USING  // ADL of std names        RealType result;        RealType trials = dist.trials();        RealType success_fraction = dist.success_fraction();        if(false == binomial_detail::check_dist_and_prob(           "boost::math::quantile(binomial_distribution<%1%> const&, %1%)",           trials,           success_fraction,           p,           &result, Policy()))        {           return result;        }        // Special cases:        //        if(p == 0)        {  // There may actually be no answer to this question,           // since the probability of zero successes may be non-zero,           // but zero is the best we can do:           return 0;        }        if(p == 1)        {  // Probability of n or fewer successes is always one,           // so n is the most sensible answer here:           return trials;        }        if (p <= pow(1 - success_fraction, trials))        { // p <= pdf(dist, 0) == cdf(dist, 0)          return 0; // So the only reasonable result is zero.        } // And root finder would fail otherwise.        // Solve for quantile numerically:        //        RealType guess = binomial_detail::inverse_binomial_cornish_fisher(trials, success_fraction, p, q, Policy());        RealType factor = 8;        if(trials > 100)           factor = 1.01f; // guess is pretty accurate        else if((trials > 10) && (trials - 1 > guess) && (guess > 3))           factor = 1.15f; // less accurate but OK.        else if(trials < 10)        {           // pretty inaccurate guess in this area:           if(guess > trials / 64)           {              guess = trials / 4;              factor = 2;           }           else              guess = trials / 1024;        }        else           factor = 2; // trials largish, but in far tails.        typedef typename Policy::discrete_quantile_type discrete_quantile_type;        boost::uintmax_t max_iter = policies::get_max_root_iterations<Policy>();        return detail::inverse_discrete_quantile(            dist,            p,            q,            guess,            factor,            RealType(1),            discrete_quantile_type(),            max_iter);      } // quantile     }    template <class RealType = double, class Policy = policies::policy<> >    class binomial_distribution    {    public:      typedef RealType value_type;      typedef Policy policy_type;      binomial_distribution(RealType n = 1, RealType p = 0.5) : m_n(n), m_p(p)      { // Default n = 1 is the Bernoulli distribution        // with equal probability of 'heads' or 'tails.         RealType r;         binomial_detail::check_dist(            "boost::math::binomial_distribution<%1%>::binomial_distribution",            m_n,            m_p,            &r, Policy());      } // binomial_distribution constructor.      RealType success_fraction() const      { // Probability.        return m_p;      }      RealType trials() const      { // Total number of trials.        return m_n;      }      enum interval_type{         clopper_pearson_exact_interval,         jeffreys_prior_interval      };      //      // Estimation of the success fraction parameter.      // The best estimate is actually simply successes/trials,      // these functions are used      // to obtain confidence intervals for the success fraction.      //      static RealType find_lower_bound_on_p(         RealType trials,         RealType successes,         RealType probability,         interval_type t = clopper_pearson_exact_interval)      {        static const char* function = "boost::math::binomial_distribution<%1%>::find_lower_bound_on_p";        // Error checks:        RealType result;        if(false == binomial_detail::check_dist_and_k(           function, trials, RealType(0), successes, &result, Policy())            &&           binomial_detail::check_dist_and_prob(           function, trials, RealType(0), probability, &result, Policy()))        { return result; }        if(successes == 0)           return 0;        // NOTE!!! The Clopper Pearson formula uses "successes" not        // "successes+1" as usual to get the lower bound,        // see http://www.itl.nist.gov/div898/handbook/prc/section2/prc241.htm        return (t == clopper_pearson_exact_interval) ? ibeta_inv(successes, trials - successes + 1, probability, static_cast<RealType*>(0), Policy())           : ibeta_inv(successes + 0.5f, trials - successes + 0.5f, probability, static_cast<RealType*>(0), Policy());      }      static RealType find_upper_bound_on_p(         RealType trials,         RealType successes,         RealType probability,         interval_type t = clopper_pearson_exact_interval)      {        static const char* function = "boost::math::binomial_distribution<%1%>::find_upper_bound_on_p";        // Error checks:        RealType result;        if(false == binomial_detail::check_dist_and_k(           function, trials, RealType(0), successes, &result, Policy())            &&           binomial_detail::check_dist_and_prob(           function, trials, RealType(0), probability, &result, Policy()))        { return result; }        if(trials == successes)           return 1;        return (t == clopper_pearson_exact_interval) ? ibetac_inv(successes + 1, trials - successes, probability, static_cast<RealType*>(0), Policy())           : ibetac_inv(successes + 0.5f, trials - successes + 0.5f, probability, static_cast<RealType*>(0), Policy());      }      // Estimate number of trials parameter:      //      // "How many trials do I need to be P% sure of seeing k events?"      //    or      // "How many trials can I have to be P% sure of seeing fewer than k events?"

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?