# SPDX-FileCopyrightText: Copyright © 2023 Idiap Research Institute <contact@idiap.ch>
#
# SPDX-License-Identifier: GPL-3.0-or-later
"""Frequentist confidence interval estimation.
(Frequentist) confidence interval interpretation, with 95% coverage: **If we
are to take several independent random samples from the population and
construct confidence intervals from each of the sample data, then 95 out of 100
confidence intervals will contain the true mean (true proportion, in this
context of proportion)**.
See a discussion in `Five Confidence Intervals for Proportions That You
Should Know About <ci-evaluation_>`_.
.. include:: ../links.rst
"""
import typing
import numpy
import numpy.typing
import scipy.stats
from ..utils import as_int_arrays
def _clopper_pearson_ndarray(
successes: numpy.typing.NDArray[numpy.integer],
failures: numpy.typing.NDArray[numpy.integer],
coverage: float,
) -> tuple[
numpy.typing.NDArray[numpy.double],
numpy.typing.NDArray[numpy.double],
numpy.typing.NDArray[numpy.double],
]:
""":py:func:`clopper_pearson`, for multiple systems.
Parameters
----------
successes
Number of successes observed on the experiment.
failures
Number of failures observed on the experiment.
coverage
A floating-point number between 0 and 1.0 indicating the coverage
you're expecting. A value of 0.95 will ensure 95% coverage.
Returns
-------
The estimated ratio between successes, and total trials (successes plus
failures), lower and upper bounds of the confidence interval, in this
order.
Raises
------
TypeError
If the dimensions of ``successes`` and ``failures`` do not match, or in
case the input types are unsupported.
"""
right = (1.0 - coverage) / 2 # half-width in each side
lower = scipy.stats.beta.ppf(right, successes, failures + 1)
upper = scipy.stats.beta.ppf(1 - right, successes + 1, failures)
lower = numpy.nan_to_num(lower, nan=0.0)
upper = numpy.nan_to_num(upper, nan=1.0)
return successes / (successes + failures), lower, upper
[docs]
def clopper_pearson_array(
successes: typing.Iterable[int],
failures: typing.Iterable[int],
coverage: float,
) -> tuple[
numpy.typing.NDArray[numpy.double],
numpy.typing.NDArray[numpy.double],
numpy.typing.NDArray[numpy.double],
]:
""":py:func:`clopper_pearson`, for multiple systems.
Parameters
----------
successes
Number of successes observed on the experiment.
failures
Number of failures observed on the experiment.
coverage
A floating-point number between 0 and 1.0 indicating the coverage
you're expecting. A value of 0.95 will ensure 95% coverage.
Returns
-------
The estimated ratio between successes, and total trials (successes plus
failures), lower and upper bounds of the confidence interval, in this
order.
Raises
------
TypeError
If the dimensions of ``successes`` and ``failures`` do not match, or in
case the input types are unsupported.
"""
successes_array, failures_array = as_int_arrays((successes, failures))
return _clopper_pearson_ndarray(successes_array, failures_array, coverage)
[docs]
def clopper_pearson(
successes: int, failures: int, coverage: float = 0.95
) -> tuple[float, float, float]:
"""Calculate the "exact" confidence interval for proportion estimates.
The Clopper-Pearson interval method is used for estimating the confidence
intervals. This implementation is based on [CLOPPER-1934]_. This
technique is **very** conservative - in most of the cases, coverage is
greater than the required value, which may imply in too large confidence
intervals.
Parameters
----------
successes
Number of successes observed on the experiment.
failures
Number of failures observed on the experiment.
coverage
A floating-point number between 0 and 1.0 indicating the coverage
you're expecting. A value of 0.95 will ensure 95% coverage.
Returns
-------
The estimated ratio between successes, and total trials (successes plus
failures), lower and upper bounds of the confidence interval, in this
order.
Raises
------
TypeError
If the dimensions of ``successes`` and ``failures`` do not match, or in
case the input types are unsupported.
"""
retval = clopper_pearson_array([successes], [failures], coverage)
return (retval[0].item(), retval[1].item(), retval[2].item())
def _agresti_coull_ndarray(
successes: numpy.typing.NDArray[numpy.integer],
failures: numpy.typing.NDArray[numpy.integer],
coverage: float,
) -> tuple[
numpy.typing.NDArray[numpy.double],
numpy.typing.NDArray[numpy.double],
numpy.typing.NDArray[numpy.double],
]:
""":py:func:`agresti_coull`, for multiple systems.
Parameters
----------
successes
Number of successes observed on the experiment.
failures
Number of failures observed on the experiment.
coverage
A floating-point number between 0 and 1.0 indicating the
coverage you're expecting. A value of 0.95 will ensure 95% coverage.
Returns
-------
The estimated ratio between successes, and total trials (successes plus
failures), lower and upper bounds of the confidence interval, in this
order.
Raises
------
TypeError
If the dimensions of ``successes`` and ``failures`` do not match, or in
case the input types are unsupported.
"""
right = (1.0 - coverage) / 2 # half-width in each side
crit = scipy.stats.norm.isf(right)
kl_c = (successes + failures) + crit**2
q_c = (successes + crit**2 / 2.0) / kl_c
std_c = numpy.sqrt(q_c * (1.0 - q_c) / kl_c)
dist = crit * std_c
lower = q_c - dist
upper = q_c + dist
lower = numpy.nan_to_num(lower, nan=0.0)
upper = numpy.nan_to_num(upper, nan=1.0)
return successes / (successes + failures), lower, upper
[docs]
def agresti_coull_array(
successes: typing.Iterable[int],
failures: typing.Iterable[int],
coverage: float,
) -> tuple[
numpy.typing.NDArray[numpy.double],
numpy.typing.NDArray[numpy.double],
numpy.typing.NDArray[numpy.double],
]:
""":py:func:`agresti_coull`, for multiple systems.
Parameters
----------
successes
Number of successes observed on the experiment.
failures
Number of failures observed on the experiment.
coverage
A floating-point number between 0 and 1.0 indicating the
coverage you're expecting. A value of 0.95 will ensure 95% coverage.
Returns
-------
The estimated ratio between successes, and total trials (successes plus
failures), lower and upper bounds of the confidence interval, in this
order.
Raises
------
TypeError
If the dimensions of ``successes`` and ``failures`` do not match, or in
case the input types are unsupported.
"""
successes_array, failures_array = as_int_arrays((successes, failures))
return _agresti_coull_ndarray(successes_array, failures_array, coverage)
[docs]
def agresti_coull(
successes: int, failures: int, coverage: float = 0.95
) -> tuple[float, float, float]:
"""Calculate the confidence interval for proportion estimates.
The Agresti-Coull interval method is used for estimating the confidence
intervals. This implementation is based on [AGRESTI-1998]_. This
technique is conservative - in most of the cases, coverage is greater
than the required value, which may imply a larger confidence interval that
required.
This function is considered a good choice for the frequentist approach, if
you cannot use :py:func:`clopper_pearson`.
Parameters
----------
successes
Number of successes observed on the experiment.
failures
Number of failures observed on the experiment.
coverage
A floating-point number between 0 and 1.0 indicating the
coverage you're expecting. A value of 0.95 will ensure 95% coverage.
Returns
-------
The estimated ratio between successes, and total trials (successes plus
failures), lower and upper bounds of the confidence interval, in this
order.
Raises
------
TypeError
If the dimensions of ``successes`` and ``failures`` do not match, or in
case the input types are unsupported.
"""
retval = agresti_coull_array([successes], [failures], coverage)
return (retval[0].item(), retval[1].item(), retval[2].item())
def _wilson_ndarray(
successes: numpy.typing.NDArray[numpy.integer],
failures: numpy.typing.NDArray[numpy.integer],
coverage: float,
) -> tuple[
numpy.typing.NDArray[numpy.double],
numpy.typing.NDArray[numpy.double],
numpy.typing.NDArray[numpy.double],
]:
""":py:func:`wilson`, for multiple systems.
Parameters
----------
successes
Number of successes observed on the experiment.
failures
Number of failures observed on the experiment.
coverage
A floating-point number between 0 and 1.0 indicating the
coverage you're expecting. A value of 0.95 will ensure 95% coverage.
Returns
-------
The estimated ratio between successes, and total trials (successes plus
failures), lower and upper bounds of the confidence interval, in this
order.
Raises
------
TypeError
If the dimensions of ``successes`` and ``failures`` do not match, or in
case the input types are unsupported.
"""
right = (1.0 - coverage) / 2 # half-width in each side
n = successes + failures
p = successes / n
crit = scipy.stats.norm.isf(right)
crit2 = crit**2
denom = 1 + (crit2 / n)
center = (p + crit2 / (2 * n)) / denom
dist = crit * numpy.sqrt(p * (1.0 - p) / n + crit2 / (4.0 * n**2))
dist = dist / denom
lower = center - dist
upper = center + dist
lower = numpy.nan_to_num(lower, nan=0.0)
upper = numpy.nan_to_num(upper, nan=1.0)
return successes / (successes + failures), lower, upper
[docs]
def wilson_array(
successes: typing.Iterable[int],
failures: typing.Iterable[int],
coverage: float,
) -> tuple[
numpy.typing.NDArray[numpy.double],
numpy.typing.NDArray[numpy.double],
numpy.typing.NDArray[numpy.double],
]:
""":py:func:`wilson`, for multiple systems.
Parameters
----------
successes
Number of successes observed on the experiment.
failures
Number of failures observed on the experiment.
coverage
A floating-point number between 0 and 1.0 indicating the
coverage you're expecting. A value of 0.95 will ensure 95% coverage.
Returns
-------
The estimated ratio between successes, and total trials (successes plus
failures), lower and upper bounds of the confidence interval, in this
order.
Raises
------
TypeError
If the dimensions of ``successes`` and ``failures`` do not match, or in
case the input types are unsupported.
"""
successes_array, failures_array = as_int_arrays((successes, failures))
return _wilson_ndarray(successes_array, failures_array, coverage)
[docs]
def wilson(
successes: int, failures: int, coverage: float = 0.95
) -> tuple[float, float, float]:
"""Calculate the confidence interval for proportion estimates.
The Wilson interval method is used for estimating the confidence intervals.
This implementation is based on [WILSON-1927]_. This implementation does
**not** contain the continuity correction. It is as conservative in the
extremes of the domain as the bayesian approach and can be a good default,
if :py:func:`clopper_pearson` cannot be used.
This function is considered the best "default" for the frequentist
approach as it is not too conservative and assumes a resonable value
through out the range.
Parameters
----------
successes
Number of successes observed on the experiment.
failures
Number of failures observed on the experiment.
coverage
A floating-point number between 0 and 1.0 indicating the
coverage you're expecting. A value of 0.95 will ensure 95% coverage.
Returns
-------
The estimated ratio between successes, and total trials (successes plus
failures), lower and upper bounds of the confidence interval, in this
order.
Raises
------
TypeError
If the dimensions of ``successes`` and ``failures`` do not match, or in
case the input types are unsupported.
"""
retval = wilson_array([successes], [failures], coverage)
return (retval[0].item(), retval[1].item(), retval[2].item())