Source code for credible.curves

# SPDX-FileCopyrightText: Copyright © 2023 Idiap Research Institute <contact@idiap.ch>
#
# SPDX-License-Identifier: GPL-3.0-or-later
"""Curve plotting support.

Code in this module expresses classic performance curves for system performance
evaluation as sets of coordinates.  Use the module :py:mod:`.plot` to make
graphical representations.

.. include:: ../links.rst
"""

import enum
import typing

import numpy
import numpy.linalg
import numpy.typing
import sklearn.metrics

from .utils import CIArrayFunctor, CIFunctor


[docs] class AxisType(enum.Enum): r"""Supported types for metrics that have binomial distributions. This enumeration contains a list of metrics (rates) that have binomial distributions. For each available entry, we define the successes (``k``) and failures (``l``) such that the metric can be calculated as such: .. math:: metric = \frac{k}{k+l} Keys with the same (integer) value represent synonyms. Definitions are taken from https://en.wikipedia.org/wiki/Confusion_matrix. Parameters ---------- key One of the integer keys for supported measures: * True positive rate, recall, sensitivity: 1 * True negative rate, specificity, selectivity: 2 * False negative rate: 3 * False positive rate: 4 * Precision, positive preditive value: 5 * Negative predictive value: 6 fullname Full name of the measure. abbreviation The abbreviation for the measure (three-letter, lower-case). """ TPR = (1, "true positive rate", "tpr") REC = (1, "recall", "rec") SEN = (1, "sensitivity", "sen") TNR = (2, "true negative rate", "tnr") SPEC = (2, "specificity", "spec") SEL = (2, "selectivity", "sel") FNR = (3, "false negative rate", "fnr") FPR = (4, "false positive rate", "fpr") PREC = (5, "precision", "prec") PPV = (5, "positive predictive value", "ppv") NPV = (6, "negative predictive value", "npv") def __init__(self, key: int, fullname: str, abbreviation: str): self.key = key self.fullname = fullname self.abbreviation = abbreviation
[docs] def make_cm_functor( self, ci_functor: CIFunctor ) -> typing.Callable[ [numpy.typing.NDArray[numpy.int_]], tuple[float, float, float] ]: """Return callable to treat binary confusion matrices and produce curve and confidence intervals. This method will take a confidence interval functor and will wrap it over a complete confusion-matrix functor that produces the metric estimates, lower and upper confidence intervals, in this order. Parameters ---------- ci_functor Functor to evaluate the rate, lower and upper confidence intervals from (binomial) successes and failures. Returns ------- A functor that can operate directly from the confusion matrix outputs (instead of success and failures), with the same return values as the input functor. """ successes = failures = -1 match self.key: # Notice scikit-learn order: # tn, fp, fn, tp = confusion_matrix(...).ravel() # From https://scikit-learn.org/stable/modules/generated/sklearn.metrics.confusion_matrix.html case 1: # tpr successes = 3 # tp failures = 2 # fn case 2: # tnr successes = 0 # tn failures = 1 # fp case 3: # fnr successes = 2 # fn failures = 3 # tp case 4: # fpr successes = 1 # fp failures = 0 # tn case 5: # precision successes = 3 # tp failures = 1 # fp case 6: # npv successes = 0 # tn failures = 2 # fn return lambda x: ci_functor(x[successes], x[failures])
[docs] def curve_ci( y_true: typing.Iterable[int], y_score: typing.Iterable[float], axes: tuple[AxisType, AxisType], ci_functor: CIFunctor, skl_functor_name: typing.Literal["roc", "det", "pr"], ) -> tuple[ numpy.typing.NDArray[numpy.double], # axes[0] rate numpy.typing.NDArray[numpy.double], # axes[1] rate numpy.typing.NDArray[numpy.double], # thresholds numpy.typing.NDArray[numpy.double], # axes[0] lower ci numpy.typing.NDArray[numpy.double], # axes[1] lower ci numpy.typing.NDArray[numpy.double], # axes[0] upper ci numpy.typing.NDArray[numpy.double], # axes[1] upper ci ]: """Calculate points and confidence intervals of an arbitrary performance curve. This function can calculate the rates and confidence intervals of an user-configurable ROC-style curve. Parameters ---------- y_true Ground truth (correct) labels. y_score Target scores, can either be probability estimates of the positive class, confidence values, or non-thresholded measure of decisions (as returned by “decision_function” on some classifiers). axes Which axes to calculate the curve for. Note not all combinations make sense (no checks are performed). You should avoid computing a curve, for example, of TPR against FNR as these rates are complementary to 1.0. ci_functor A callable to be used to calculate the estimate, lower, and upper bounds for the measures of interest on each of the axes. This callable will receive (binomial, integer) successes and failures, and produce the rate, lower and upper bounds respecctively. skl_functor_name The name of a callable from scikit-learn to calculate basic thresholds for the target curve. Use one of: * ``roc``: :py:func:`sklearn.metrics.roc_curve` * ``det``: :py:func:`sklearn.metrics.det_curve` * ``pr``: :py:func:`sklearn.metrics.precision_recall_curve` Returns ------- tuple Seven 1-D floating point arrays corresponding to: * The selected metric for the first axis * The selected metric for the second axis * The thresholds used to evaluated the selected metrics, in decreasing order. * The lower confidence interval for the selected metric for the first axis * The lower confidence interval selected metric for the second axis * The upper confidence interval for the selected metric for the first axis * The upper confidence interval selected metric for the second axis """ # finally, we do the computing of the curve y_true_array = numpy.asarray(y_true, dtype=numpy.int_) y_score_array = numpy.asarray(y_score, dtype=numpy.double) skl_functor = sklearn.metrics.roc_curve match skl_functor_name: case "roc": skl_functor = sklearn.metrics.roc_curve case "det": skl_functor = sklearn.metrics.det_curve case "pr": skl_functor = sklearn.metrics.precision_recall_curve _, _, thresholds = skl_functor(y_true_array, y_score_array) cm_per_threshold = [ sklearn.metrics.confusion_matrix(y_true_array, (y_score_array >= t)) .ravel() .astype(numpy.int_) for t in thresholds ] # convert confusion-matrices per threshold into rate, lower, and upper # values per threshold op0 = axes[0].make_cm_functor(ci_functor) ax0 = numpy.asarray([op0(k) for k in cm_per_threshold], dtype=numpy.double) op1 = axes[1].make_cm_functor(ci_functor) ax1 = numpy.asarray([op1(k) for k in cm_per_threshold], dtype=numpy.double) # ensure we have no lower that is higher than the values and vice-versa ax0[:, 1] = numpy.min((ax0[:, 0], ax0[:, 1]), axis=0) ax0[:, 2] = numpy.max((ax0[:, 0], ax0[:, 2]), axis=0) ax1[:, 1] = numpy.min((ax1[:, 0], ax1[:, 1]), axis=0) ax1[:, 2] = numpy.max((ax1[:, 0], ax1[:, 2]), axis=0) return ( ax0[:, 0], ax1[:, 0], thresholds, ax0[:, 1], ax1[:, 1], ax0[:, 2], ax1[:, 2], )
[docs] def curve_ci_hull( curve: tuple[ typing.Iterable[float], # axes[0] rate typing.Iterable[float], # axes[1] rate typing.Iterable[float], # thresholds (ignored) typing.Iterable[float], # axes[0] lower ci typing.Iterable[float], # axes[1] lower ci typing.Iterable[float], # axes[0] upper ci typing.Iterable[float], # axes[1] upper ci ], extrapolate_from_origin: bool = True, ) -> tuple[ tuple[ numpy.typing.NDArray[numpy.double], numpy.typing.NDArray[numpy.double] ], tuple[ numpy.typing.NDArray[numpy.double], numpy.typing.NDArray[numpy.double] ], ]: """Calculate lower and upper confidence intervals of a curve. This function calculates the hulls for 2 curves that are formed from points defining the lower and upper bounds of the curve's credible/confidence intervals for each measured threshold. It returns the curve (no changes), as well as the lower and upper bounds of the (central) curve. To calculate the upper and lower curves, we do not consider the extremities of the upper and lower bounds, as those points would translate to pessimistic estimations of the true confidence interval bounds. Instead, we simply find the intersection of a straight line from the origin (0,0) and the ellipse 90-degree sector inscribed in the appropriate quarter of a rectangle centered at the ROC point, and its lower and upper bound CI estimates on both directions (horizontal and vertical). If ``extrapolate_from_origin`` is set to ``False``, then intersections are created from ``(x, y) = (1, 0)``. Parameters ---------- curve Seven 1-D floating point arrays corresponding to: * The selected metric for the first axis * The selected metric for the second axis * The thresholds (in decreasing order, as produced by scikit-learn) * The lower confidence interval for the selected metric for the first axis * The lower confidence interval selected metric for the second axis * The upper confidence interval for the selected metric for the first axis * The upper confidence interval selected metric for the second axis extrapolate_from_origin To calculate the upper hull, we consider two distinct cases: if ``extrapolate_from_origin`` is ``True`` (default), then we consider the curve starts (or finishes) at coordinate ``(x,y) = (1,0)`` and finishes (or starts) at ``(x,y) = (0,1)``. This is the case if the user is plotting TPR against TNR or FPR against FNR. If ``extrapolate_from_origin`` is ``False``, then we consider the curve starts (or finishes) at ``(x,y) = (0,0)``, and finishes (or starts) at ``(x,y) = (1,1)``. If ``extrapolate_from_origin`` is ``True`` (default), each point of the curve to extrapolates to the right and upper points defined by the upper bounds of the credible/confidence intervals, and to the left and lower points defined by the lower bounds of the intervals. If ``extrapolate_from_origin`` is ``False``, each point of the curve to extrapolates to the left and upper points defined by the upper bounds of the credible/confidence intervals, and to the right and lower points defined by the lower bounds of the intervals. Returns ------- tuple Two curves as follows: * lower: Two 1D arrays of floats that expresses the lower-bound of curve, for the first and second coordinates respectively. * upper: Two 1D arrays of floats that expresses the upper-bound of curve, for the first and second coordinates respectively. """ def _ellipse_intersect( a: numpy.typing.NDArray[numpy.double], b: numpy.typing.NDArray[numpy.double], i: numpy.typing.NDArray[numpy.double], j: numpy.typing.NDArray[numpy.double], quadrant: typing.Literal["tl", "tr", "bl", "br"], ) -> tuple[ numpy.typing.NDArray[numpy.double], numpy.typing.NDArray[numpy.double] ]: r"""Calculate the intersection of a line, passing at the center of the ellipse, and the ellipse itself. See: https://mathworld.wolfram.com/Ellipse-LineIntersection.html .. math:: x &= \frac{1}{\sqrt{\frac{1}{a^2}+\frac{j^2}{b^2 i^2}}} \\ y &= x \frac{j}{i} Parameters ---------- a Width of the ellipse. b Height of the ellipse. i Width of the reference vector (to compute angle). j Height of the reference vector (to compute angle). quadrant The quadrant applicable to the vector direction: either "tl" (top-left), "tr" (top-right), "bl" (bottom-left) or "br" (bottom-right). Returns ------- A tuple containing the x and y coordinates of the curve. """ # radius calculation, without direction eps = 1e-8 num = (a**2) * (b**2) * (i**2) den = ((b**2) * (i**2)) + ((a**2) * (j**2)) x = numpy.sqrt( numpy.divide(num, den, out=numpy.zeros_like(a), where=(den > eps)) ) y = numpy.divide(x * j, i, out=numpy.zeros_like(a), where=(i > eps)) # when close to the x-axis, just re-use the ellipse width x = numpy.where(j < eps, a, x) # when close to the (1,0)-(1,1)-axis, just re-use the ellipse height y = numpy.where(i < eps, b, y) match quadrant: case "tr": # add on both directions return (i + x, j + y) case "tl": # add on y direction, subtract on x # n.b.: we use (1-i) to recover the original spatial # coordinates (see below on the call to this function) return ((1 - i) - x, j + y) case "bl": # subtract on both directions return (i - x, j - y) case "br": # subtract on y direction, add on x # n.b.: we use (1-i) to recover the original spatial # coordinates (see below on the call to this function) return ((1 - i) + x, j - y) (x, y, _, x_low, y_low, x_high, y_high) = tuple( numpy.asarray(k, dtype=numpy.double) for k in curve ) if extrapolate_from_origin: # sectors are lower left or upper right # N.B.: distance to origin is approximately symmetric considering the # whole curve # (x, y) -> (x_low, y_low) lower_x, lower_y = _ellipse_intersect( numpy.abs(x_low - x), # width of ellipse numpy.abs(y_low - y), # height of ellipse x, # width of reference vector (only angle matters) y, # height of reference vector (only angle matters) "bl", ) # (x, y) -> (x_high, y_high) upper_x, upper_y = _ellipse_intersect( numpy.abs(x_high - x), # width of ellipse numpy.abs(y_high - y), # height of ellipse x, # width of reference vector (only angle matters) y, # height of reference vector (only angle matters) "tr", ) else: # sectors are lower right or upper left # N.B.: angles must be taken with respect to (1,0) and not (0,0) as the # curve is facing the other direction. # (x, y) -> (x_high, y_low) lower_x, lower_y = _ellipse_intersect( numpy.abs(x_high - x), # width of ellipse numpy.abs(y_low - y), # height of ellipse 1 - x, # width of reference vector (only angle matters) y, # height of reference vector (only angle matters) "br", ) # (x, y) -> (x_low, y_high) upper_x, upper_y = _ellipse_intersect( numpy.abs(x_low - x), # width of ellipse numpy.abs(y_high - y), # height of ellipse 1 - x, # width of reference vector (only angle matters) y, # height of reference vector (only angle matters) "tl", ) return (lower_x, lower_y), (upper_x, upper_y)
[docs] def area_under_the_curve( curve: tuple[ typing.Sequence[float] | numpy.typing.NDArray[numpy.double], typing.Sequence[float] | numpy.typing.NDArray[numpy.double], ], ) -> float: """Calculate the area under a curve using a trapezoidal rule. Parameters ---------- curve A tuple with 2 1D sequences of floating point numbers representing the first and second coordinate of the curve whose you want to evaluate AUC for. Returns ------- The area under the curve (floating point scalar). """ return numpy.abs(numpy.trapezoid(curve[1], curve[0])) # type: ignore[reportReturnType]
[docs] def average_metric( curve: tuple[ typing.Sequence[float] | numpy.typing.NDArray[numpy.double], typing.Sequence[float] | numpy.typing.NDArray[numpy.double], ], ) -> float: r"""Calculate the area under a curve using a rectangle rule. Typically used to calculate the average precision (AP) as in: https://en.wikipedia.org/wiki/Evaluation_measures_(information_retrieval)#Average_precision .. math:: \text{AP} &= \sum_n (R_n - R_{n-1}) P_n \\ \text{AP} &= \sum_n (curve1[n] - curve1[n-1]) curve0[n] According to the scikit-learn documentation for :py:func:`sklearn.metrics.average_precision_score`, this implementation *"is different from computing the area under the precision-recall curve with the trapezoidal rule, which uses linear interpolation and can be too optimistic"*. .. note:: Due to differences in the way this package computes the precision-recall curve (does not add an extra (1.0, 0.0) at the end of the PR curve like sklearn does, see documentation for :py:func:`sklearn.metrics.precision_recall_curve`), we compensate this here. Parameters ---------- curve A tuple with 2 1D sequences of floating point numbers representing the first and second coordinate of the curve whose you want to evaluate AUC for. Returns ------- The area under the curve (floating point scalar). """ return -sum(numpy.diff(numpy.append(curve[1], 0.0)) * curve[0])
[docs] def estimated_ci_coverage( ci_functor: CIArrayFunctor, rng: numpy.random.Generator, n: int = 100, ) -> numpy.typing.NDArray[numpy.double]: """Return the approximate coverage of a credible region or confidence interval estimator. Reference: `This blog post <ci-evaluation_>`_. Parameters ---------- ci_functor : object A callable that accepts ``k``, the number of successes (1D integer numpy.ndarray), ``l`` (1D integer numpy.ndarray), the number of failures to account for in the estimation of the interval/region. This function must return two float parameters only corresponding to the lower and upper bounds of the credible region or confidence interval being estimated. rng An initialized numpy random number generator. n The number of bernoulli trials to consider on the binomial distribution. This represents the total number of samples you'd have for your experiment. Returns ------- The actual coverage curve, you can expect. The first row corresponds to the values of ``p`` that were probed. The second row, the actual coverage considering a simulated binomial distribution with size ``n``. """ coverage = [] size = 10000 # how many experiments to do at each try r = numpy.arange(1 / n, 1.0, step=1 / n) for p in r: k = rng.binomial(n=n, p=p, size=size) _, lower, upper = ci_functor(k, n - k) covered = numpy.asarray((lower < p) & (p < upper), dtype=float) coverage.append(covered.mean()) return numpy.vstack((r, numpy.asarray(coverage, dtype=float)))