mirror of
				https://github.com/saitohirga/WSJT-X.git
				synced 2025-11-03 21:40:52 -05:00 
			
		
		
		
	
		
			
				
	
	
		
			374 lines
		
	
	
		
			14 KiB
		
	
	
	
		
			Plaintext
		
	
	
	
	
	
			
		
		
	
	
			374 lines
		
	
	
		
			14 KiB
		
	
	
	
		
			Plaintext
		
	
	
	
	
	
[section:negative_binomial_dist Negative Binomial Distribution]
 | 
						|
 | 
						|
``#include <boost/math/distributions/negative_binomial.hpp>``
 | 
						|
 | 
						|
   namespace boost{ namespace math{
 | 
						|
 | 
						|
   template <class RealType = double,
 | 
						|
             class ``__Policy``   = ``__policy_class`` >
 | 
						|
   class negative_binomial_distribution;
 | 
						|
 | 
						|
   typedef negative_binomial_distribution<> negative_binomial;
 | 
						|
 | 
						|
   template <class RealType, class ``__Policy``>
 | 
						|
   class negative_binomial_distribution
 | 
						|
   {
 | 
						|
   public:
 | 
						|
      typedef RealType value_type;
 | 
						|
      typedef Policy   policy_type;
 | 
						|
      // Constructor from successes and success_fraction:
 | 
						|
      negative_binomial_distribution(RealType r, RealType p);
 | 
						|
 | 
						|
      // Parameter accessors:
 | 
						|
      RealType success_fraction() const;
 | 
						|
      RealType successes() const;
 | 
						|
 | 
						|
      // Bounds on success fraction:
 | 
						|
      static RealType find_lower_bound_on_p(
 | 
						|
         RealType trials,
 | 
						|
         RealType successes,
 | 
						|
         RealType probability); // alpha
 | 
						|
      static RealType find_upper_bound_on_p(
 | 
						|
         RealType trials,
 | 
						|
         RealType successes,
 | 
						|
         RealType probability); // alpha
 | 
						|
 | 
						|
      // Estimate min/max number of trials:
 | 
						|
      static RealType find_minimum_number_of_trials(
 | 
						|
         RealType k,     // Number of failures.
 | 
						|
         RealType p,     // Success fraction.
 | 
						|
         RealType probability); // Probability threshold alpha.
 | 
						|
      static RealType find_maximum_number_of_trials(
 | 
						|
         RealType k,     // Number of failures.
 | 
						|
         RealType p,     // Success fraction.
 | 
						|
         RealType probability); // Probability threshold alpha.
 | 
						|
   };
 | 
						|
 | 
						|
   }} // namespaces
 | 
						|
 | 
						|
The class type `negative_binomial_distribution` represents a
 | 
						|
[@http://en.wikipedia.org/wiki/Negative_binomial_distribution negative_binomial distribution]:
 | 
						|
it is used when there are exactly two mutually exclusive outcomes of a
 | 
						|
[@http://en.wikipedia.org/wiki/Bernoulli_trial Bernoulli trial]:
 | 
						|
these outcomes are labelled "success" and "failure".
 | 
						|
 | 
						|
For k + r Bernoulli trials each with success fraction p, the
 | 
						|
negative_binomial distribution gives the probability of observing
 | 
						|
k failures and r successes with success on the last trial.
 | 
						|
The negative_binomial distribution
 | 
						|
assumes that success_fraction p is fixed for all (k + r) trials.
 | 
						|
 | 
						|
[note The random variable for the negative binomial distribution is the number of trials,
 | 
						|
(the number of successes is a fixed property of the distribution)
 | 
						|
whereas for the binomial,
 | 
						|
the random variable is the number of successes, for a fixed number of trials.]
 | 
						|
 | 
						|
It has the PDF:
 | 
						|
 | 
						|
[equation neg_binomial_ref]
 | 
						|
 | 
						|
The following graph illustrate how the PDF varies as the success fraction
 | 
						|
/p/ changes:
 | 
						|
 | 
						|
[graph negative_binomial_pdf_1]
 | 
						|
 | 
						|
Alternatively, this graph shows how the shape of the PDF varies as
 | 
						|
the number of successes changes:
 | 
						|
 | 
						|
[graph negative_binomial_pdf_2]
 | 
						|
 | 
						|
[h4 Related Distributions]
 | 
						|
 | 
						|
The name negative binomial distribution is reserved by some to the
 | 
						|
case where the successes parameter r is an integer.
 | 
						|
This integer version is also called the
 | 
						|
[@http://mathworld.wolfram.com/PascalDistribution.html Pascal distribution].
 | 
						|
 | 
						|
This implementation uses real numbers for the computation throughout
 | 
						|
(because it uses the *real-valued* incomplete beta function family of functions).
 | 
						|
This real-valued version is also called the Polya Distribution.
 | 
						|
 | 
						|
The Poisson distribution is a generalization of the Pascal distribution,
 | 
						|
where the success parameter r is an integer: to obtain the Pascal
 | 
						|
distribution you must ensure that an integer value is provided for r,
 | 
						|
and take integer values (floor or ceiling) from functions that return
 | 
						|
a number of successes.
 | 
						|
 | 
						|
For large values of r (successes), the negative binomial distribution
 | 
						|
converges to the Poisson distribution.
 | 
						|
 | 
						|
The geometric distribution is a special case
 | 
						|
where the successes parameter r = 1,
 | 
						|
so only a first and only success is required.
 | 
						|
geometric(p) = negative_binomial(1, p).
 | 
						|
 | 
						|
The Poisson distribution is a special case for large successes
 | 
						|
 | 
						|
poisson([lambda]) = lim [sub r [rarr] [infin]] [space] negative_binomial(r, r / ([lambda] + r)))
 | 
						|
 | 
						|
[discrete_quantile_warning Negative Binomial]
 | 
						|
 | 
						|
[h4 Member Functions]
 | 
						|
 | 
						|
[h5 Construct]
 | 
						|
 | 
						|
   negative_binomial_distribution(RealType r, RealType p);
 | 
						|
 | 
						|
Constructor: /r/ is the total number of successes, /p/ is the
 | 
						|
probability of success of a single trial.
 | 
						|
 | 
						|
Requires: `r > 0` and `0 <= p <= 1`.
 | 
						|
 | 
						|
[h5 Accessors]
 | 
						|
 | 
						|
   RealType success_fraction() const; // successes / trials (0 <= p <= 1)
 | 
						|
 | 
						|
Returns the parameter /p/ from which this distribution was constructed.
 | 
						|
 | 
						|
   RealType successes() const; // required successes (r > 0)
 | 
						|
 | 
						|
Returns the parameter /r/ from which this distribution was constructed.
 | 
						|
 | 
						|
The best method of calculation for the following functions is disputed:
 | 
						|
see __binomial_distrib for more discussion.
 | 
						|
 | 
						|
[h5 Lower Bound on Parameter p]
 | 
						|
 | 
						|
      static RealType find_lower_bound_on_p(
 | 
						|
        RealType failures,
 | 
						|
        RealType successes,
 | 
						|
        RealType probability) // (0 <= alpha <= 1), 0.05 equivalent to 95% confidence.
 | 
						|
 | 
						|
Returns a *lower bound* on the success fraction:
 | 
						|
 | 
						|
[variablelist
 | 
						|
[[failures][The total number of failures before the ['r]th success.]]
 | 
						|
[[successes][The number of successes required.]]
 | 
						|
[[alpha][The largest acceptable probability that the true value of
 | 
						|
         the success fraction is [*less than] the value returned.]]
 | 
						|
]
 | 
						|
 | 
						|
For example, if you observe /k/ failures and /r/ successes from /n/ = k + r trials
 | 
						|
the best estimate for the success fraction is simply ['r/n], but if you
 | 
						|
want to be 95% sure that the true value is [*greater than] some value,
 | 
						|
['p[sub min]], then:
 | 
						|
 | 
						|
   p``[sub min]`` = negative_binomial_distribution<RealType>::find_lower_bound_on_p(
 | 
						|
                       failures, successes, 0.05);
 | 
						|
 | 
						|
[link math_toolkit.stat_tut.weg.neg_binom_eg.neg_binom_conf See negative binomial confidence interval example.]
 | 
						|
 | 
						|
This function uses the Clopper-Pearson method of computing the lower bound on the
 | 
						|
success fraction, whilst many texts refer to this method as giving an "exact"
 | 
						|
result in practice it produces an interval that guarantees ['at least] the
 | 
						|
coverage required, and may produce pessimistic estimates for some combinations
 | 
						|
of /failures/ and /successes/.  See:
 | 
						|
 | 
						|
[@http://www.ucs.louisiana.edu/~kxk4695/Discrete_new.pdf
 | 
						|
Yong Cai and K. Krishnamoorthy, A Simple Improved Inferential Method for Some Discrete Distributions.
 | 
						|
Computational statistics and data analysis, 2005, vol. 48, no3, 605-621].
 | 
						|
 | 
						|
[h5 Upper Bound on Parameter p]
 | 
						|
 | 
						|
   static RealType find_upper_bound_on_p(
 | 
						|
      RealType trials,
 | 
						|
      RealType successes,
 | 
						|
      RealType alpha); // (0 <= alpha <= 1), 0.05 equivalent to 95% confidence.
 | 
						|
 | 
						|
Returns an *upper bound* on the success fraction:
 | 
						|
 | 
						|
[variablelist
 | 
						|
[[trials][The total number of trials conducted.]]
 | 
						|
[[successes][The number of successes that occurred.]]
 | 
						|
[[alpha][The largest acceptable probability that the true value of
 | 
						|
         the success fraction is [*greater than] the value returned.]]
 | 
						|
]
 | 
						|
 | 
						|
For example, if you observe /k/ successes from /n/ trials the
 | 
						|
best estimate for the success fraction is simply ['k/n], but if you
 | 
						|
want to be 95% sure that the true value is [*less than] some value,
 | 
						|
['p[sub max]], then:
 | 
						|
 | 
						|
   p``[sub max]`` = negative_binomial_distribution<RealType>::find_upper_bound_on_p(
 | 
						|
                       r, k, 0.05);
 | 
						|
 | 
						|
[link math_toolkit.stat_tut.weg.neg_binom_eg.neg_binom_conf See negative binomial confidence interval example.]
 | 
						|
 | 
						|
This function uses the Clopper-Pearson method of computing the lower bound on the
 | 
						|
success fraction, whilst many texts refer to this method as giving an "exact"
 | 
						|
result in practice it produces an interval that guarantees ['at least] the
 | 
						|
coverage required, and may produce pessimistic estimates for some combinations
 | 
						|
of /failures/ and /successes/.  See:
 | 
						|
 | 
						|
[@http://www.ucs.louisiana.edu/~kxk4695/Discrete_new.pdf
 | 
						|
Yong Cai and K. Krishnamoorthy, A Simple Improved Inferential Method for Some Discrete Distributions.
 | 
						|
Computational statistics and data analysis, 2005, vol. 48, no3, 605-621].
 | 
						|
 | 
						|
[h5 Estimating Number of Trials to Ensure at Least a Certain Number of Failures]
 | 
						|
 | 
						|
   static RealType find_minimum_number_of_trials(
 | 
						|
      RealType k,     // number of failures.
 | 
						|
      RealType p,     // success fraction.
 | 
						|
      RealType alpha); // probability threshold (0.05 equivalent to 95%).
 | 
						|
 | 
						|
This functions estimates the number of trials required to achieve a certain
 | 
						|
probability that [*more than k failures will be observed].
 | 
						|
 | 
						|
[variablelist
 | 
						|
[[k][The target number of failures to be observed.]]
 | 
						|
[[p][The probability of ['success] for each trial.]]
 | 
						|
[[alpha][The maximum acceptable risk that only k failures or fewer will be observed.]]
 | 
						|
]
 | 
						|
 | 
						|
For example:
 | 
						|
 | 
						|
   negative_binomial_distribution<RealType>::find_minimum_number_of_trials(10, 0.5, 0.05);
 | 
						|
 | 
						|
Returns the smallest number of trials we must conduct to be 95% sure
 | 
						|
of seeing 10 failures that occur with frequency one half.
 | 
						|
 | 
						|
[link math_toolkit.stat_tut.weg.neg_binom_eg.neg_binom_size_eg Worked Example.]
 | 
						|
 | 
						|
This function uses numeric inversion of the negative binomial distribution
 | 
						|
to obtain the result: another interpretation of the result, is that it finds
 | 
						|
the number of trials (success+failures) that will lead to an /alpha/ probability
 | 
						|
of observing k failures or fewer.
 | 
						|
 | 
						|
[h5 Estimating Number of Trials to Ensure a Maximum Number of Failures or Less]
 | 
						|
 | 
						|
   static RealType find_maximum_number_of_trials(
 | 
						|
      RealType k,     // number of failures.
 | 
						|
      RealType p,     // success fraction.
 | 
						|
      RealType alpha); // probability threshold (0.05 equivalent to 95%).
 | 
						|
 | 
						|
This functions estimates the maximum number of trials we can conduct and achieve
 | 
						|
a certain probability that [*k failures or fewer will be observed].
 | 
						|
 | 
						|
[variablelist
 | 
						|
[[k][The maximum number of failures to be observed.]]
 | 
						|
[[p][The probability of ['success] for each trial.]]
 | 
						|
[[alpha][The maximum acceptable ['risk] that more than k failures will be observed.]]
 | 
						|
]
 | 
						|
 | 
						|
For example:
 | 
						|
 | 
						|
   negative_binomial_distribution<RealType>::find_maximum_number_of_trials(0, 1.0-1.0/1000000, 0.05);
 | 
						|
 | 
						|
Returns the largest number of trials we can conduct and still be 95% sure
 | 
						|
of seeing no failures that occur with frequency one in one million.
 | 
						|
 | 
						|
This function uses numeric inversion of the negative binomial distribution
 | 
						|
to obtain the result: another interpretation of the result, is that it finds
 | 
						|
the number of trials (success+failures) that will lead to an /alpha/ probability
 | 
						|
of observing more than k failures.
 | 
						|
 | 
						|
[h4 Non-member Accessors]
 | 
						|
 | 
						|
All the [link math_toolkit.dist_ref.nmp usual non-member accessor functions]
 | 
						|
that are generic to all distributions are supported: __usual_accessors.
 | 
						|
 | 
						|
However it's worth taking a moment to define what these actually mean in
 | 
						|
the context of this distribution:
 | 
						|
 | 
						|
[table Meaning of the non-member accessors.
 | 
						|
[[Function][Meaning]]
 | 
						|
[[__pdf]
 | 
						|
   [The probability of obtaining [*exactly k failures] from k+r trials
 | 
						|
   with success fraction p.  For example:
 | 
						|
 | 
						|
``pdf(negative_binomial(r, p), k)``]]
 | 
						|
[[__cdf]
 | 
						|
   [The probability of obtaining [*k failures or fewer] from k+r trials
 | 
						|
   with success fraction p and success on the last trial.  For example:
 | 
						|
 | 
						|
``cdf(negative_binomial(r, p), k)``]]
 | 
						|
[[__ccdf]
 | 
						|
   [The probability of obtaining [*more than k failures] from k+r trials
 | 
						|
   with success fraction p and success on the last trial.  For example:
 | 
						|
 | 
						|
``cdf(complement(negative_binomial(r, p), k))``]]
 | 
						|
[[__quantile]
 | 
						|
   [The [*greatest] number of failures k expected to be observed from k+r trials
 | 
						|
   with success fraction p, at probability P.  Note that the value returned
 | 
						|
   is a real-number, and not an integer.  Depending on the use case you may
 | 
						|
   want to take either the floor or ceiling of the real result.  For example:
 | 
						|
 | 
						|
``quantile(negative_binomial(r, p), P)``]]
 | 
						|
[[__quantile_c]
 | 
						|
   [The [*smallest] number of failures k expected to be observed from k+r trials
 | 
						|
   with success fraction p, at probability P.  Note that the value returned
 | 
						|
   is a real-number, and not an integer.  Depending on the use case you may
 | 
						|
   want to take either the floor or ceiling of the real result. For example:
 | 
						|
   ``quantile(complement(negative_binomial(r, p), P))``]]
 | 
						|
]
 | 
						|
 | 
						|
[h4 Accuracy]
 | 
						|
 | 
						|
This distribution is implemented using the
 | 
						|
incomplete beta functions __ibeta and __ibetac:
 | 
						|
please refer to these functions for information on accuracy.
 | 
						|
 | 
						|
[h4 Implementation]
 | 
						|
 | 
						|
In the following table, /p/ is the probability that any one trial will
 | 
						|
be successful (the success fraction), /r/ is the number of successes,
 | 
						|
/k/ is the number of failures, /p/ is the probability and /q = 1-p/.
 | 
						|
 | 
						|
[table
 | 
						|
[[Function][Implementation Notes]]
 | 
						|
[[pdf][pdf = exp(lgamma(r + k) - lgamma(r) - lgamma(k+1)) * pow(p, r) * pow((1-p), k)
 | 
						|
 | 
						|
Implementation is in terms of __ibeta_derivative:
 | 
						|
 | 
						|
(p/(r + k)) * ibeta_derivative(r, static_cast<RealType>(k+1), p)
 | 
						|
The function __ibeta_derivative is used here, since it has already
 | 
						|
been optimised for the lowest possible error - indeed this is really
 | 
						|
just a thin wrapper around part of the internals of the incomplete
 | 
						|
beta function.
 | 
						|
]]
 | 
						|
[[cdf][Using the relation:
 | 
						|
 | 
						|
cdf = I[sub p](r, k+1) = ibeta(r, k+1, p)
 | 
						|
 | 
						|
= ibeta(r, static_cast<RealType>(k+1), p)]]
 | 
						|
[[cdf complement][Using the relation:
 | 
						|
 | 
						|
1 - cdf = I[sub p](k+1, r)
 | 
						|
 | 
						|
= ibetac(r, static_cast<RealType>(k+1), p)
 | 
						|
]]
 | 
						|
[[quantile][ibeta_invb(r, p, P) - 1]]
 | 
						|
[[quantile from the complement][ibetac_invb(r, p, Q) -1)]]
 | 
						|
[[mean][ `r(1-p)/p` ]]
 | 
						|
[[variance][ `r (1-p) / p * p` ]]
 | 
						|
[[mode][`floor((r-1) * (1 - p)/p)`]]
 | 
						|
[[skewness][`(2 - p) / sqrt(r * (1 - p))`]]
 | 
						|
[[kurtosis][`6 / r + (p * p) / r * (1 - p )`]]
 | 
						|
[[kurtosis excess][`6 / r + (p * p) / r * (1 - p ) -3`]]
 | 
						|
[[parameter estimation member functions][]]
 | 
						|
[[`find_lower_bound_on_p`][ibeta_inv(successes, failures + 1, alpha)]]
 | 
						|
[[`find_upper_bound_on_p`][ibetac_inv(successes, failures, alpha) plus see comments in code.]]
 | 
						|
[[`find_minimum_number_of_trials`][ibeta_inva(k + 1, p, alpha)]]
 | 
						|
[[`find_maximum_number_of_trials`][ibetac_inva(k + 1, p, alpha)]]
 | 
						|
]
 | 
						|
 | 
						|
Implementation notes:
 | 
						|
 | 
						|
* The real concept type (that deliberately lacks the Lanczos approximation),
 | 
						|
was found to take several minutes to evaluate some extreme test values,
 | 
						|
so the test has been disabled for this type.
 | 
						|
 | 
						|
* Much greater speed, and perhaps greater accuracy,
 | 
						|
might be achieved for extreme values by using a normal approximation.
 | 
						|
This is NOT been tested or implemented.
 | 
						|
 | 
						|
[endsect][/section:negative_binomial_dist Negative Binomial]
 | 
						|
 | 
						|
[/ negative_binomial.qbk
 | 
						|
  Copyright 2006 John Maddock and Paul A. Bristow.
 | 
						|
  Distributed under the Boost Software License, Version 1.0.
 | 
						|
  (See accompanying file LICENSE_1_0.txt or copy at
 | 
						|
  http://www.boost.org/LICENSE_1_0.txt).
 | 
						|
]
 | 
						|
 |