mirror of
				https://github.com/saitohirga/WSJT-X.git
				synced 2025-10-29 20:10:28 -04:00 
			
		
		
		
	
		
			
	
	
		
			374 lines
		
	
	
		
			14 KiB
		
	
	
	
		
			Plaintext
		
	
	
	
	
	
		
		
			
		
	
	
			374 lines
		
	
	
		
			14 KiB
		
	
	
	
		
			Plaintext
		
	
	
	
	
	
|  | [section:negative_binomial_dist Negative Binomial Distribution] | ||
|  | 
 | ||
|  | ``#include <boost/math/distributions/negative_binomial.hpp>`` | ||
|  | 
 | ||
|  |    namespace boost{ namespace math{ | ||
|  | 
 | ||
|  |    template <class RealType = double, | ||
|  |              class ``__Policy``   = ``__policy_class`` > | ||
|  |    class negative_binomial_distribution; | ||
|  | 
 | ||
|  |    typedef negative_binomial_distribution<> negative_binomial; | ||
|  | 
 | ||
|  |    template <class RealType, class ``__Policy``> | ||
|  |    class negative_binomial_distribution | ||
|  |    { | ||
|  |    public: | ||
|  |       typedef RealType value_type; | ||
|  |       typedef Policy   policy_type; | ||
|  |       // Constructor from successes and success_fraction: | ||
|  |       negative_binomial_distribution(RealType r, RealType p); | ||
|  | 
 | ||
|  |       // Parameter accessors: | ||
|  |       RealType success_fraction() const; | ||
|  |       RealType successes() const; | ||
|  | 
 | ||
|  |       // Bounds on success fraction: | ||
|  |       static RealType find_lower_bound_on_p( | ||
|  |          RealType trials, | ||
|  |          RealType successes, | ||
|  |          RealType probability); // alpha | ||
|  |       static RealType find_upper_bound_on_p( | ||
|  |          RealType trials, | ||
|  |          RealType successes, | ||
|  |          RealType probability); // alpha | ||
|  | 
 | ||
|  |       // Estimate min/max number of trials: | ||
|  |       static RealType find_minimum_number_of_trials( | ||
|  |          RealType k,     // Number of failures. | ||
|  |          RealType p,     // Success fraction. | ||
|  |          RealType probability); // Probability threshold alpha. | ||
|  |       static RealType find_maximum_number_of_trials( | ||
|  |          RealType k,     // Number of failures. | ||
|  |          RealType p,     // Success fraction. | ||
|  |          RealType probability); // Probability threshold alpha. | ||
|  |    }; | ||
|  | 
 | ||
|  |    }} // namespaces | ||
|  | 
 | ||
|  | The class type `negative_binomial_distribution` represents a | ||
|  | [@http://en.wikipedia.org/wiki/Negative_binomial_distribution negative_binomial distribution]: | ||
|  | it is used when there are exactly two mutually exclusive outcomes of a | ||
|  | [@http://en.wikipedia.org/wiki/Bernoulli_trial Bernoulli trial]: | ||
|  | these outcomes are labelled "success" and "failure". | ||
|  | 
 | ||
|  | For k + r Bernoulli trials each with success fraction p, the | ||
|  | negative_binomial distribution gives the probability of observing | ||
|  | k failures and r successes with success on the last trial. | ||
|  | The negative_binomial distribution | ||
|  | assumes that success_fraction p is fixed for all (k + r) trials. | ||
|  | 
 | ||
|  | [note The random variable for the negative binomial distribution is the number of trials, | ||
|  | (the number of successes is a fixed property of the distribution) | ||
|  | whereas for the binomial, | ||
|  | the random variable is the number of successes, for a fixed number of trials.] | ||
|  | 
 | ||
|  | It has the PDF: | ||
|  | 
 | ||
|  | [equation neg_binomial_ref] | ||
|  | 
 | ||
|  | The following graph illustrate how the PDF varies as the success fraction | ||
|  | /p/ changes: | ||
|  | 
 | ||
|  | [graph negative_binomial_pdf_1] | ||
|  | 
 | ||
|  | Alternatively, this graph shows how the shape of the PDF varies as | ||
|  | the number of successes changes: | ||
|  | 
 | ||
|  | [graph negative_binomial_pdf_2] | ||
|  | 
 | ||
|  | [h4 Related Distributions] | ||
|  | 
 | ||
|  | The name negative binomial distribution is reserved by some to the | ||
|  | case where the successes parameter r is an integer. | ||
|  | This integer version is also called the | ||
|  | [@http://mathworld.wolfram.com/PascalDistribution.html Pascal distribution]. | ||
|  | 
 | ||
|  | This implementation uses real numbers for the computation throughout | ||
|  | (because it uses the *real-valued* incomplete beta function family of functions). | ||
|  | This real-valued version is also called the Polya Distribution. | ||
|  | 
 | ||
|  | The Poisson distribution is a generalization of the Pascal distribution, | ||
|  | where the success parameter r is an integer: to obtain the Pascal | ||
|  | distribution you must ensure that an integer value is provided for r, | ||
|  | and take integer values (floor or ceiling) from functions that return | ||
|  | a number of successes. | ||
|  | 
 | ||
|  | For large values of r (successes), the negative binomial distribution | ||
|  | converges to the Poisson distribution. | ||
|  | 
 | ||
|  | The geometric distribution is a special case | ||
|  | where the successes parameter r = 1, | ||
|  | so only a first and only success is required. | ||
|  | geometric(p) = negative_binomial(1, p). | ||
|  | 
 | ||
|  | The Poisson distribution is a special case for large successes | ||
|  | 
 | ||
|  | poisson([lambda]) = lim [sub r [rarr] [infin]] [space] negative_binomial(r, r / ([lambda] + r))) | ||
|  | 
 | ||
|  | [discrete_quantile_warning Negative Binomial] | ||
|  | 
 | ||
|  | [h4 Member Functions] | ||
|  | 
 | ||
|  | [h5 Construct] | ||
|  | 
 | ||
|  |    negative_binomial_distribution(RealType r, RealType p); | ||
|  | 
 | ||
|  | Constructor: /r/ is the total number of successes, /p/ is the | ||
|  | probability of success of a single trial. | ||
|  | 
 | ||
|  | Requires: `r > 0` and `0 <= p <= 1`. | ||
|  | 
 | ||
|  | [h5 Accessors] | ||
|  | 
 | ||
|  |    RealType success_fraction() const; // successes / trials (0 <= p <= 1) | ||
|  | 
 | ||
|  | Returns the parameter /p/ from which this distribution was constructed. | ||
|  | 
 | ||
|  |    RealType successes() const; // required successes (r > 0) | ||
|  | 
 | ||
|  | Returns the parameter /r/ from which this distribution was constructed. | ||
|  | 
 | ||
|  | The best method of calculation for the following functions is disputed: | ||
|  | see __binomial_distrib for more discussion. | ||
|  | 
 | ||
|  | [h5 Lower Bound on Parameter p] | ||
|  | 
 | ||
|  |       static RealType find_lower_bound_on_p( | ||
|  |         RealType failures, | ||
|  |         RealType successes, | ||
|  |         RealType probability) // (0 <= alpha <= 1), 0.05 equivalent to 95% confidence. | ||
|  | 
 | ||
|  | Returns a *lower bound* on the success fraction: | ||
|  | 
 | ||
|  | [variablelist | ||
|  | [[failures][The total number of failures before the ['r]th success.]] | ||
|  | [[successes][The number of successes required.]] | ||
|  | [[alpha][The largest acceptable probability that the true value of | ||
|  |          the success fraction is [*less than] the value returned.]] | ||
|  | ] | ||
|  | 
 | ||
|  | For example, if you observe /k/ failures and /r/ successes from /n/ = k + r trials | ||
|  | the best estimate for the success fraction is simply ['r/n], but if you | ||
|  | want to be 95% sure that the true value is [*greater than] some value, | ||
|  | ['p[sub min]], then: | ||
|  | 
 | ||
|  |    p``[sub min]`` = negative_binomial_distribution<RealType>::find_lower_bound_on_p( | ||
|  |                        failures, successes, 0.05); | ||
|  | 
 | ||
|  | [link math_toolkit.stat_tut.weg.neg_binom_eg.neg_binom_conf See negative binomial confidence interval example.] | ||
|  | 
 | ||
|  | This function uses the Clopper-Pearson method of computing the lower bound on the | ||
|  | success fraction, whilst many texts refer to this method as giving an "exact" | ||
|  | result in practice it produces an interval that guarantees ['at least] the | ||
|  | coverage required, and may produce pessimistic estimates for some combinations | ||
|  | of /failures/ and /successes/.  See: | ||
|  | 
 | ||
|  | [@http://www.ucs.louisiana.edu/~kxk4695/Discrete_new.pdf | ||
|  | Yong Cai and K. Krishnamoorthy, A Simple Improved Inferential Method for Some Discrete Distributions. | ||
|  | Computational statistics and data analysis, 2005, vol. 48, no3, 605-621]. | ||
|  | 
 | ||
|  | [h5 Upper Bound on Parameter p] | ||
|  | 
 | ||
|  |    static RealType find_upper_bound_on_p( | ||
|  |       RealType trials, | ||
|  |       RealType successes, | ||
|  |       RealType alpha); // (0 <= alpha <= 1), 0.05 equivalent to 95% confidence. | ||
|  | 
 | ||
|  | Returns an *upper bound* on the success fraction: | ||
|  | 
 | ||
|  | [variablelist | ||
|  | [[trials][The total number of trials conducted.]] | ||
|  | [[successes][The number of successes that occurred.]] | ||
|  | [[alpha][The largest acceptable probability that the true value of | ||
|  |          the success fraction is [*greater than] the value returned.]] | ||
|  | ] | ||
|  | 
 | ||
|  | For example, if you observe /k/ successes from /n/ trials the | ||
|  | best estimate for the success fraction is simply ['k/n], but if you | ||
|  | want to be 95% sure that the true value is [*less than] some value, | ||
|  | ['p[sub max]], then: | ||
|  | 
 | ||
|  |    p``[sub max]`` = negative_binomial_distribution<RealType>::find_upper_bound_on_p( | ||
|  |                        r, k, 0.05); | ||
|  | 
 | ||
|  | [link math_toolkit.stat_tut.weg.neg_binom_eg.neg_binom_conf See negative binomial confidence interval example.] | ||
|  | 
 | ||
|  | This function uses the Clopper-Pearson method of computing the lower bound on the | ||
|  | success fraction, whilst many texts refer to this method as giving an "exact" | ||
|  | result in practice it produces an interval that guarantees ['at least] the | ||
|  | coverage required, and may produce pessimistic estimates for some combinations | ||
|  | of /failures/ and /successes/.  See: | ||
|  | 
 | ||
|  | [@http://www.ucs.louisiana.edu/~kxk4695/Discrete_new.pdf | ||
|  | Yong Cai and K. Krishnamoorthy, A Simple Improved Inferential Method for Some Discrete Distributions. | ||
|  | Computational statistics and data analysis, 2005, vol. 48, no3, 605-621]. | ||
|  | 
 | ||
|  | [h5 Estimating Number of Trials to Ensure at Least a Certain Number of Failures] | ||
|  | 
 | ||
|  |    static RealType find_minimum_number_of_trials( | ||
|  |       RealType k,     // number of failures. | ||
|  |       RealType p,     // success fraction. | ||
|  |       RealType alpha); // probability threshold (0.05 equivalent to 95%). | ||
|  | 
 | ||
|  | This functions estimates the number of trials required to achieve a certain | ||
|  | probability that [*more than k failures will be observed]. | ||
|  | 
 | ||
|  | [variablelist | ||
|  | [[k][The target number of failures to be observed.]] | ||
|  | [[p][The probability of ['success] for each trial.]] | ||
|  | [[alpha][The maximum acceptable risk that only k failures or fewer will be observed.]] | ||
|  | ] | ||
|  | 
 | ||
|  | For example: | ||
|  | 
 | ||
|  |    negative_binomial_distribution<RealType>::find_minimum_number_of_trials(10, 0.5, 0.05); | ||
|  | 
 | ||
|  | Returns the smallest number of trials we must conduct to be 95% sure | ||
|  | of seeing 10 failures that occur with frequency one half. | ||
|  | 
 | ||
|  | [link math_toolkit.stat_tut.weg.neg_binom_eg.neg_binom_size_eg Worked Example.] | ||
|  | 
 | ||
|  | This function uses numeric inversion of the negative binomial distribution | ||
|  | to obtain the result: another interpretation of the result, is that it finds | ||
|  | the number of trials (success+failures) that will lead to an /alpha/ probability | ||
|  | of observing k failures or fewer. | ||
|  | 
 | ||
|  | [h5 Estimating Number of Trials to Ensure a Maximum Number of Failures or Less] | ||
|  | 
 | ||
|  |    static RealType find_maximum_number_of_trials( | ||
|  |       RealType k,     // number of failures. | ||
|  |       RealType p,     // success fraction. | ||
|  |       RealType alpha); // probability threshold (0.05 equivalent to 95%). | ||
|  | 
 | ||
|  | This functions estimates the maximum number of trials we can conduct and achieve | ||
|  | a certain probability that [*k failures or fewer will be observed]. | ||
|  | 
 | ||
|  | [variablelist | ||
|  | [[k][The maximum number of failures to be observed.]] | ||
|  | [[p][The probability of ['success] for each trial.]] | ||
|  | [[alpha][The maximum acceptable ['risk] that more than k failures will be observed.]] | ||
|  | ] | ||
|  | 
 | ||
|  | For example: | ||
|  | 
 | ||
|  |    negative_binomial_distribution<RealType>::find_maximum_number_of_trials(0, 1.0-1.0/1000000, 0.05); | ||
|  | 
 | ||
|  | Returns the largest number of trials we can conduct and still be 95% sure | ||
|  | of seeing no failures that occur with frequency one in one million. | ||
|  | 
 | ||
|  | This function uses numeric inversion of the negative binomial distribution | ||
|  | to obtain the result: another interpretation of the result, is that it finds | ||
|  | the number of trials (success+failures) that will lead to an /alpha/ probability | ||
|  | of observing more than k failures. | ||
|  | 
 | ||
|  | [h4 Non-member Accessors] | ||
|  | 
 | ||
|  | All the [link math_toolkit.dist_ref.nmp usual non-member accessor functions] | ||
|  | that are generic to all distributions are supported: __usual_accessors. | ||
|  | 
 | ||
|  | However it's worth taking a moment to define what these actually mean in | ||
|  | the context of this distribution: | ||
|  | 
 | ||
|  | [table Meaning of the non-member accessors. | ||
|  | [[Function][Meaning]] | ||
|  | [[__pdf] | ||
|  |    [The probability of obtaining [*exactly k failures] from k+r trials | ||
|  |    with success fraction p.  For example: | ||
|  | 
 | ||
|  | ``pdf(negative_binomial(r, p), k)``]] | ||
|  | [[__cdf] | ||
|  |    [The probability of obtaining [*k failures or fewer] from k+r trials | ||
|  |    with success fraction p and success on the last trial.  For example: | ||
|  | 
 | ||
|  | ``cdf(negative_binomial(r, p), k)``]] | ||
|  | [[__ccdf] | ||
|  |    [The probability of obtaining [*more than k failures] from k+r trials | ||
|  |    with success fraction p and success on the last trial.  For example: | ||
|  | 
 | ||
|  | ``cdf(complement(negative_binomial(r, p), k))``]] | ||
|  | [[__quantile] | ||
|  |    [The [*greatest] number of failures k expected to be observed from k+r trials | ||
|  |    with success fraction p, at probability P.  Note that the value returned | ||
|  |    is a real-number, and not an integer.  Depending on the use case you may | ||
|  |    want to take either the floor or ceiling of the real result.  For example: | ||
|  | 
 | ||
|  | ``quantile(negative_binomial(r, p), P)``]] | ||
|  | [[__quantile_c] | ||
|  |    [The [*smallest] number of failures k expected to be observed from k+r trials | ||
|  |    with success fraction p, at probability P.  Note that the value returned | ||
|  |    is a real-number, and not an integer.  Depending on the use case you may | ||
|  |    want to take either the floor or ceiling of the real result. For example: | ||
|  |    ``quantile(complement(negative_binomial(r, p), P))``]] | ||
|  | ] | ||
|  | 
 | ||
|  | [h4 Accuracy] | ||
|  | 
 | ||
|  | This distribution is implemented using the | ||
|  | incomplete beta functions __ibeta and __ibetac: | ||
|  | please refer to these functions for information on accuracy. | ||
|  | 
 | ||
|  | [h4 Implementation] | ||
|  | 
 | ||
|  | In the following table, /p/ is the probability that any one trial will | ||
|  | be successful (the success fraction), /r/ is the number of successes, | ||
|  | /k/ is the number of failures, /p/ is the probability and /q = 1-p/. | ||
|  | 
 | ||
|  | [table | ||
|  | [[Function][Implementation Notes]] | ||
|  | [[pdf][pdf = exp(lgamma(r + k) - lgamma(r) - lgamma(k+1)) * pow(p, r) * pow((1-p), k) | ||
|  | 
 | ||
|  | Implementation is in terms of __ibeta_derivative: | ||
|  | 
 | ||
|  | (p/(r + k)) * ibeta_derivative(r, static_cast<RealType>(k+1), p) | ||
|  | The function __ibeta_derivative is used here, since it has already | ||
|  | been optimised for the lowest possible error - indeed this is really | ||
|  | just a thin wrapper around part of the internals of the incomplete | ||
|  | beta function. | ||
|  | ]] | ||
|  | [[cdf][Using the relation: | ||
|  | 
 | ||
|  | cdf = I[sub p](r, k+1) = ibeta(r, k+1, p) | ||
|  | 
 | ||
|  | = ibeta(r, static_cast<RealType>(k+1), p)]] | ||
|  | [[cdf complement][Using the relation: | ||
|  | 
 | ||
|  | 1 - cdf = I[sub p](k+1, r) | ||
|  | 
 | ||
|  | = ibetac(r, static_cast<RealType>(k+1), p) | ||
|  | ]] | ||
|  | [[quantile][ibeta_invb(r, p, P) - 1]] | ||
|  | [[quantile from the complement][ibetac_invb(r, p, Q) -1)]] | ||
|  | [[mean][ `r(1-p)/p` ]] | ||
|  | [[variance][ `r (1-p) / p * p` ]] | ||
|  | [[mode][`floor((r-1) * (1 - p)/p)`]] | ||
|  | [[skewness][`(2 - p) / sqrt(r * (1 - p))`]] | ||
|  | [[kurtosis][`6 / r + (p * p) / r * (1 - p )`]] | ||
|  | [[kurtosis excess][`6 / r + (p * p) / r * (1 - p ) -3`]] | ||
|  | [[parameter estimation member functions][]] | ||
|  | [[`find_lower_bound_on_p`][ibeta_inv(successes, failures + 1, alpha)]] | ||
|  | [[`find_upper_bound_on_p`][ibetac_inv(successes, failures, alpha) plus see comments in code.]] | ||
|  | [[`find_minimum_number_of_trials`][ibeta_inva(k + 1, p, alpha)]] | ||
|  | [[`find_maximum_number_of_trials`][ibetac_inva(k + 1, p, alpha)]] | ||
|  | ] | ||
|  | 
 | ||
|  | Implementation notes: | ||
|  | 
 | ||
|  | * The real concept type (that deliberately lacks the Lanczos approximation), | ||
|  | was found to take several minutes to evaluate some extreme test values, | ||
|  | so the test has been disabled for this type. | ||
|  | 
 | ||
|  | * Much greater speed, and perhaps greater accuracy, | ||
|  | might be achieved for extreme values by using a normal approximation. | ||
|  | This is NOT been tested or implemented. | ||
|  | 
 | ||
|  | [endsect][/section:negative_binomial_dist Negative Binomial] | ||
|  | 
 | ||
|  | [/ negative_binomial.qbk | ||
|  |   Copyright 2006 John Maddock and Paul A. Bristow. | ||
|  |   Distributed under the Boost Software License, Version 1.0. | ||
|  |   (See accompanying file LICENSE_1_0.txt or copy at | ||
|  |   http://www.boost.org/LICENSE_1_0.txt). | ||
|  | ] | ||
|  | 
 |