# SPDX-FileCopyrightText: Copyright © 2023 Idiap Research Institute <contact@idiap.ch>
#
# SPDX-License-Identifier: GPL-3.0-or-later
"""Implementation of :py:mod:`Scikit-Learn compatible measures
<sklearn.metrics>` with bayesian credible regions for k-folding experiments.
"""
import typing
import numpy
import numpy.typing
import sklearn.metrics
from . import utils
from .metrics import NUMBER_MC_SAMPLES
[docs]
def precision_score(
y_true: typing.Iterable[typing.Iterable[int]],
y_pred: typing.Iterable[typing.Iterable[int]],
rng: numpy.random.Generator,
lambda_: float = 1.0,
coverage: float = 0.95,
nb_samples: int = NUMBER_MC_SAMPLES,
) -> tuple[float, float, float, float]:
r"""Precision **binary** classification score.
AKA positive predictive value (PPV), mean, mode and credible intervals. It
corresponds arithmetically to ``tp/(tp+fp)``. This function only supports
**binary** classification problems.
Parameters
----------
y_true
Ground truth (correct) labels.
y_pred
Predicted labels, as returned by a classifier.
rng
An initialized numpy random number generator.
lambda_
The parameterisation of the Beta prior to consider. Use
:math:`\lambda=1` for a flat prior. Use :math:`\lambda=0.5` for
Jeffrey's prior. Changes in this value do not significantly affect the
outcome, unless ``tp`` or ``fp`` are very small (close to 1).
coverage
A floating-point number between 0 and 1.0 indicating the coverage
you're expecting. A value of 0.95 will ensure 95% of the area under
the probability density of the posterior is covered by the returned
equal-tailed interval.
nb_samples
Number of generated variates for the M-C simulation.
Returns
-------
A tuple with 4 floating-point numbers:
* The average precision, as would be returned by scikit-learn
* The mode of the posterior distribution
* The lower value of the credible region/confidence interval
* The upper value of the credible region/confidence interval
"""
cms = numpy.asarray(
[
sklearn.metrics.confusion_matrix(i, j).ravel()
for (i, j) in zip(y_true, y_pred)
],
dtype=numpy.int_,
)
fp_array = cms[:, 1]
tp_array = cms[:, 3]
_, mode, lower, upper = utils.average_beta(
tp_array, fp_array, lambda_, coverage, nb_samples, rng
)
return (
numpy.mean(tp_array / (tp_array + fp_array)).item(),
mode,
lower,
upper,
)
[docs]
def recall_score(
y_true: typing.Iterable[typing.Iterable[int]],
y_pred: typing.Iterable[typing.Iterable[int]],
rng: numpy.random.Generator,
lambda_: float = 1.0,
coverage: float = 0.95,
nb_samples: int = NUMBER_MC_SAMPLES,
) -> tuple[float, float, float, float]:
r"""Recall **binary** classification score.
AKA sensitivity, hit rate, or true positive rate (TPR), mean, mode and
credible intervals. It corresponds arithmetically to ``tp/(tp+fn)``.
Parameters
----------
y_true
Ground truth (correct) labels.
y_pred
Predicted labels, as returned by a classifier.
rng
An initialized numpy random number generator.
lambda_
The parameterisation of the Beta prior to consider. Use
:math:`\lambda=1` for a flat prior. Use :math:`\lambda=0.5` for
Jeffrey's prior. Changes in this value do not significantly affect the
outcome, unless ``tp`` or ``fp`` are very small (close to 1).
coverage
A floating-point number between 0 and 1.0 indicating the coverage
you're expecting. A value of 0.95 will ensure 95% of the area under
the probability density of the posterior is covered by the returned
equal-tailed interval.
nb_samples
Number of generated variates for the M-C simulation.
Returns
-------
A tuple with 4 floating-point numbers:
* The average recall, as would be returned by scikit-learn
* The mode of the posterior distribution
* The lower value of the credible region/confidence interval
* The upper value of the credible region/confidence interval
"""
cms = numpy.asarray(
[
sklearn.metrics.confusion_matrix(i, j).ravel()
for (i, j) in zip(y_true, y_pred)
],
dtype=numpy.int_,
)
fn_array = cms[:, 2]
tp_array = cms[:, 3]
_, mode, lower, upper = utils.average_beta(
tp_array, fn_array, lambda_, coverage, nb_samples, rng
)
return (
numpy.mean(tp_array / (tp_array + fn_array)).item(),
mode,
lower,
upper,
)
[docs]
def specificity_score(
y_true: typing.Iterable[typing.Iterable[int]],
y_pred: typing.Iterable[typing.Iterable[int]],
rng: numpy.random.Generator,
lambda_: float = 1.0,
coverage: float = 0.95,
nb_samples: int = NUMBER_MC_SAMPLES,
) -> tuple[float, float, float, float]:
r"""Specificity **binary** classification score.
AKA selectivity or true negative rate (TNR), mean, mode and credible
intervals. It corresponds arithmetically to ``tn/(tn+fp)``.
Parameters
----------
y_true
Ground truth (correct) labels.
y_pred
Predicted labels, as returned by a classifier.
rng
An initialized numpy random number generator.
lambda_
The parameterisation of the Beta prior to consider. Use
:math:`\lambda=1` for a flat prior. Use :math:`\lambda=0.5` for
Jeffrey's prior. Changes in this value do not significantly affect the
outcome, unless ``tp`` or ``fp`` are very small (close to 1).
coverage
A floating-point number between 0 and 1.0 indicating the coverage
you're expecting. A value of 0.95 will ensure 95% of the area under
the probability density of the posterior is covered by the returned
equal-tailed interval.
nb_samples
Number of generated variates for the M-C simulation.
Returns
-------
A tuple with 4 floating-point numbers:
* The average specificity, as would be returned by scikit-learn
* The mode of the posterior distribution
* The lower value of the credible region/confidence interval
* The upper value of the credible region/confidence interval
"""
cms = numpy.asarray(
[
sklearn.metrics.confusion_matrix(i, j).ravel()
for (i, j) in zip(y_true, y_pred)
],
dtype=numpy.int_,
)
tn_array = cms[:, 0]
fp_array = cms[:, 1]
_, mode, lower, upper = utils.average_beta(
tn_array, fp_array, lambda_, coverage, nb_samples, rng
)
return (
numpy.mean(tn_array / (tn_array + fp_array)).item(),
mode,
lower,
upper,
)
[docs]
def accuracy_score(
y_true: typing.Iterable[typing.Iterable[int]],
y_pred: typing.Iterable[typing.Iterable[int]],
rng: numpy.random.Generator,
lambda_: float = 1.0,
coverage: float = 0.95,
nb_samples: int = NUMBER_MC_SAMPLES,
) -> tuple[float, float, float, float]:
r"""Accuracy **binary** classification score.
See `Accuracy
<https://en.wikipedia.org/wiki/Evaluation_of_binary_classifiers>`_. is the
proportion of correct predictions (both true positives and true negatives)
among the total number of pixels examined. It corresponds arithmetically
to ``(tp+tn)/(tp+tn+fp+fn)``. This measure includes both true-negatives
and positives in the numerator, what makes it sensitive to data or regions
without annotations. AKA selectivity or true negative rate (TNR), mean,
mode and credible intervals. It corresponds arithmetically to
``tn/(tn+fp)``.
Parameters
----------
y_true
Ground truth (correct) labels.
y_pred
Predicted labels, as returned by a classifier.
rng
An initialized numpy random number generator.
lambda_
The parameterisation of the Beta prior to consider. Use
:math:`\lambda=1` for a flat prior. Use :math:`\lambda=0.5` for
Jeffrey's prior. Changes in this value do not significantly affect the
outcome, unless ``tp`` or ``fp`` are very small (close to 1).
coverage
A floating-point number between 0 and 1.0 indicating the coverage
you're expecting. A value of 0.95 will ensure 95% of the area under
the probability density of the posterior is covered by the returned
equal-tailed interval.
nb_samples
Number of generated variates for the M-C simulation.
Returns
-------
A tuple with 4 floating-point numbers:
* The average accuracy, as would be returned by scikit-learn
* The mode of the posterior distribution
* The lower value of the credible region/confidence interval
* The upper value of the credible region/confidence interval
"""
cms = numpy.asarray(
[
sklearn.metrics.confusion_matrix(i, j).ravel()
for (i, j) in zip(y_true, y_pred)
],
dtype=numpy.int_,
)
tn_array = cms[:, 0]
fp_array = cms[:, 1]
fn_array = cms[:, 2]
tp_array = cms[:, 3]
_, mode, lower, upper = utils.average_beta(
tp_array + tn_array,
fn_array + fp_array,
lambda_,
coverage,
nb_samples,
rng,
)
return (
numpy.mean(
(tp_array + tn_array) / (tn_array + fp_array + fn_array + tp_array)
).item(),
mode,
lower,
upper,
)
[docs]
def jaccard_score(
y_true: typing.Iterable[typing.Iterable[int]],
y_pred: typing.Iterable[typing.Iterable[int]],
rng: numpy.random.Generator,
lambda_: float = 1.0,
coverage: float = 0.95,
nb_samples: int = NUMBER_MC_SAMPLES,
) -> tuple[float, float, float, float]:
r"""Jaccard **binary** classification score.
See `Jaccard Index or Similarity
<https://en.wikipedia.org/wiki/Jaccard_index>`_. It corresponds
arithmetically to ``tp/(tp+fp+fn)``. The Jaccard index depends on a
TP-only numerator, similarly to the F1 score. For regions where there are
no annotations, the Jaccard index will always be zero, irrespective of the
model output. Accuracy may be a better proxy if one needs to consider the
true abscence of annotations in a region as part of the measure.
Parameters
----------
y_true
Ground truth (correct) labels.
y_pred
Predicted labels, as returned by a classifier.
rng
An initialized numpy random number generator.
lambda_
The parameterisation of the Beta prior to consider. Use
:math:`\lambda=1` for a flat prior. Use :math:`\lambda=0.5` for
Jeffrey's prior. Changes in this value do not significantly affect the
outcome, unless ``tp`` or ``fp`` are very small (close to 1).
coverage
A floating-point number between 0 and 1.0 indicating the coverage
you're expecting. A value of 0.95 will ensure 95% of the area under
the probability density of the posterior is covered by the returned
equal-tailed interval.
nb_samples
Number of generated variates for the M-C simulation.
Returns
-------
A tuple with 4 floating-point numbers:
* The average jaccard score, as would be returned by scikit-learn
* The mode of the posterior distribution
* The lower value of the credible region/confidence interval
* The upper value of the credible region/confidence interval
"""
cms = numpy.asarray(
[
sklearn.metrics.confusion_matrix(i, j).ravel()
for (i, j) in zip(y_true, y_pred)
],
dtype=numpy.int_,
)
fp_array = cms[:, 1]
fn_array = cms[:, 2]
tp_array = cms[:, 3]
_, mode, lower, upper = utils.average_beta(
tp_array,
fp_array + fn_array,
lambda_,
coverage,
nb_samples,
rng,
)
return (
numpy.mean(tp_array / (tp_array + fn_array + fp_array)).item(),
mode,
lower,
upper,
)
[docs]
def f1_score(
y_true: typing.Iterable[typing.Iterable[int]],
y_pred: typing.Iterable[typing.Iterable[int]],
rng: numpy.random.Generator,
lambda_: float = 1.0,
coverage: float = 0.95,
nb_samples: int = NUMBER_MC_SAMPLES,
) -> tuple[float, float, float, float]:
r"""Return the mean, mode, upper and lower bounds of the credible region of
the F1 score.
See `F1-score <https://en.wikipedia.org/wiki/F1_score>`_. It corresponds
arithmetically to ``2*P*R/(P+R)`` or ``2*tp/(2*tp+fp+fn)``. The F1 or Dice
score depends on a TP-only numerator, similarly to the Jaccard index. For
regions where there are no annotations, the F1-score will always be zero,
irrespective of the model output. Accuracy may be a better proxy if one
needs to consider the true abscence of annotations in a region as part of
the measure.
This implementation is based on [GOUTTE-2005]_.
Parameters
----------
y_true
Ground truth (correct) labels.
y_pred
Predicted labels, as returned by a classifier.
rng
An initialized numpy random number generator.
lambda_
The parameterisation of the Beta prior to consider. Use
:math:`\lambda=1` for a flat prior. Use :math:`\lambda=0.5` for
Jeffrey's prior.
coverage
A floating-point number between 0 and 1.0 indicating the coverage
you are expecting. A value of 0.95 will ensure 95% of the area under
the probability density of the posterior is covered by the returned
equal-tailed interval.
nb_samples
Number of generated variates for the M-C simulation.
Returns
-------
A tuple with 4 floating-point numbers:
* The average F1-score, as would be returned by scikit-learn
* The mode of the posterior distribution
* The lower value of the credible region/confidence interval
* The upper value of the credible region/confidence interval
"""
cms = numpy.asarray(
[
sklearn.metrics.confusion_matrix(i, j).ravel()
for (i, j) in zip(y_true, y_pred)
],
dtype=numpy.int_,
)
fp_array = cms[:, 1]
fn_array = cms[:, 2]
tp_array = cms[:, 3]
variates = utils.average_f1_posterior(
tp_array, fp_array, fn_array, lambda_, nb_samples, rng
)
_, mode, lower, upper = utils.evaluate_statistics(
variates, coverage, bins="auto"
)
return (
numpy.mean(
(2 * tp_array) / ((2 * tp_array) + fn_array + fp_array)
).item(),
mode,
lower,
upper,
)