Source code for score_analysis.roc_curve

"""
This module contains ROC curve calculations, including confidence bands for ROC curves.
"""

from __future__ import annotations

import math
from dataclasses import dataclass
from typing import Optional

import numpy as np
from numpy.typing import ArrayLike

from .scores import DEFAULT_BOOTSTRAP_CONFIG, BinaryLabel, BootstrapConfig, Scores

ROC_CI_EXTRA_POINTS = 20  # Number of points added for ROC confidence band computations


[docs]@dataclass
class ROCCurve:
    """
    Class to hold the values and thresholds for an ROC curve, optionally with
    confidence bands.

    Args:
        fnr: False Negative Rate.
        fpr: False Positive Rate.
        thresholds: Thresholds.
        fnr_ci: (N, 2) array with FNR confidence bands.
        fpr_ci: (N, 2) array with FPR confidence bands.
    """

    fnr: np.ndarray
    fpr: np.ndarray
    thresholds: np.ndarray
    fnr_ci: Optional[np.ndarray] = None
    fpr_ci: Optional[np.ndarray] = None

    @property
    def tpr(self):
        """True Positive Rate"""
        return 1.0 - self.fnr

    @property
    def tnr(self):
        """True Negative Rate"""
        return 1.0 - self.fpr

    @property
    def frr(self):
        """False Rejection Rate (alias for FNR)"""
        return self.fnr

    @property
    def far(self):
        """False Acceptance Rate (alias for FPR)"""
        return self.fpr

    @property
    def tar(self):
        """True Acceptance Rate (alias for TPR)"""
        return self.tpr

    @property
    def trr(self):
        """True Rejection Rate (alias for TNR)"""
        return self.tnr

    @property
    def tpr_ci(self):
        """TPR CIs"""
        return None if self.fnr_ci is None else np.copy(1.0 - self.fnr_ci[..., ::-1])

    @property
    def tnr_ci(self):
        """TNR CIs"""
        return None if self.fpr_ci is None else np.copy(1.0 - self.fpr_ci[..., ::-1])

    @property
    def frr_ci(self):
        """FRR CIs (alias for FNR CIs)"""
        return self.fnr_ci

    @property
    def far_ci(self):
        """FAR CIs (alias for FPR CIs)"""
        return self.fpr_ci

    @property
    def tar_ci(self):
        """TAR CIs (Alias for FPR CIs)"""
        return self.tpr_ci

    @property
    def trr_ci(self):
        """TRR CIs (alias for TNR CIs)"""
        return self.tnr_ci


[docs]def roc(
    scores: Scores,
    *,
    fnr: Optional[ArrayLike] = None,
    fpr: Optional[ArrayLike] = None,
    thresholds: Optional[ArrayLike] = None,
    nb_points: Optional[int] = 100,
    x_axis: str = "fpr",
) -> ROCCurve:
    """
    Computes the ROC curve at the given FNR, FPR or threshold values.

    We can provide the FNR, FPR and threshold values at which to compute the ROC
    curve. We will use the union of these points for the ROC curve.

    If neither FNR, FPR, nor threshold values are provided, if ``nb_points`` is set,
    we use linearly spaced points between the smallest positive and negative scores.

    If ``nb_points`` is None, we use *all* positive and negative scores for the
    ROC curve. This gives the highest accuracy, but can be compute-intensive if we
    are working with large amount of scores.

    Args:
        scores: Scores for which to compute ROC curve.
        fnr: FNR points at which to compute the ROC curve.
        fpr: FPR points at which to compute the ROC curve.
        thresholds: Thresholds at which to compute the ROC curve.
        nb_points: Number of linearly spaced points to use, if neither one of FNR, FPR,
            nor thresholds are provided. If None, we use all scores for the ROC curve.
        x_axis: The values for x-axis metric will be increasing.

    Returns:
        A ROCCurve object with points on the ROC curve and the corresponding thresholds.
    """
    thresholds = _find_support_thresholds(
        scores, fnr, fpr, thresholds, nb_points, None, x_axis
    )

    fnr = scores.fnr(thresholds)
    fpr = scores.fpr(thresholds)

    return ROCCurve(fnr=fnr, fpr=fpr, thresholds=thresholds)


[docs]def roc_with_ci(
    scores: Scores,
    *,
    fnr: Optional[ArrayLike] = None,
    fpr: Optional[ArrayLike] = None,
    thresholds: Optional[ArrayLike] = None,
    nb_points: Optional[int] = None,
    x_axis: str = "fpr",
    alpha: float = 0.05,
    config: BootstrapConfig = DEFAULT_BOOTSTRAP_CONFIG,
) -> ROCCurve:
    """
    Function to compute the confidence band around an ROC curve.

    We can provide the FNR, FPR and threshold values at which to compute the ROC
    curve. We will use the union of these points for the ROC curve.

    If neither FNR, FPR, nor threshold values are provided, if ``nb_points`` is set,
    we use linearly spaced points between the smallest positive and negative scores.

    If ``nb_points`` is None, we use *all* positive and negative scores for the
    ROC curve. This gives the highest accuracy, but can be compute-intensive if we
    are working with large amount of scores.

    There are two ways to plot an ROC curve, depending on which metric (FPR or FNR) ir
    plotted on the x-axis. We expect the user to provide either FPR or FNR values (but
    not both). The provided values are plotted on the x-axis and used to set a threshold
    and then compute the other metric at those thresholds. The confidence band is
    computed using the union of confidence rectangles for both FNR and FPR.

    Args:
        scores: The Scores object whose ROC curve we compute.
        fnr: Optional FNR support points on the ROC curve.
        fpr: Optional FPR support points on the ROC curve.
        thresholds: Optional threshold support points on the ROC curve.
        nb_points: If at least one of fnr, fpr and threshold is provided, this is the
            additional number of support points beyond the range of the specified
            points. If none of fnr, fpr or threshold are provided, we use this number
            of support points linearly spaced along the FNR and FPR axes.
        x_axis: The values for x-axis metric will be increasing.
        alpha: Significance level. In range (0, 1).
        config: Bootstrap config.

    Returns:
        ROCCurve object with point values for the ROC curve and the lower and upper
        bounds of the confidence band values for both metric values.
    """
    thresholds = _find_support_thresholds(
        scores,
        fnr,
        fpr,
        thresholds,
        nb_points,
        ROC_CI_EXTRA_POINTS,
        x_axis,
    )
    fnr = scores.fnr(thresholds)
    fpr = scores.fpr(thresholds)

    # Calculate bootstrap CI
    def _metric(_scores: Scores):
        _fnr = _scores.fnr(_scores.threshold_at_fpr(fpr))
        _fpr = _scores.fpr(_scores.threshold_at_fnr(fnr))
        return np.stack([_fnr, _fpr], axis=0)

    joint_ci = scores.bootstrap_ci(metric=_metric, alpha=alpha, config=config)
    fnr_ci = joint_ci[0]
    fpr_ci = joint_ci[1]

    # Rule-of-three correction for FNR and FPR being 0. or 1.
    fnr_ci = _apply_rule_of_three(p=fnr, ci=fnr_ci, alpha=alpha, n=len(scores.pos))
    fpr_ci = _apply_rule_of_three(p=fpr, ci=fpr_ci, alpha=alpha, n=len(scores.neg))

    # Here the magic happens, and we aggregate 1D CIs to a 2D confidence band.
    fpr_band = _aggregate_rectangles(fnr, fnr_ci, fpr_ci)
    fnr_band = _aggregate_rectangles(fpr, fpr_ci, fnr_ci)

    return ROCCurve(
        fnr=fnr, fpr=fpr, thresholds=thresholds, fnr_ci=fnr_band, fpr_ci=fpr_band
    )


def _find_support_thresholds(
    scores: Scores,
    fnr: Optional[np.ndarray],
    fpr: Optional[np.ndarray],
    thresholds: Optional[np.ndarray],
    nb_points: Optional[int],
    nb_extra_points: Optional[int],
    x_axis: str,
) -> np.ndarray:
    """
    This function contains the logic for combining the user-provided FNR, FPR, and
    threshold values, together with nb_points to one list of thresholds at which
    we will evaluate the ROC curve.

    If one of FNR, FPR or thresholds is provided, we use the union of thresholds
    corresponding to these values. In that case nb_points is ignored.

    If FNR, FPR and thresholds are all None, we use thresholds corresponding to
    linearly spaced points along FNR and FPR axes. To be precise, we use nb_points // 2
    points along the FNR axis and the same along the FPR axis.
    """
    if nb_extra_points is not None:
        # We subtract 4 to make space for thresholds beyond the pos and neg score
        # limits; see np.nextafter calls below.
        nb_extra_fnr_points = (nb_extra_points - 4) // 2
        nb_extra_fpr_points = nb_extra_points - 4 - nb_extra_fnr_points
    else:
        nb_extra_fnr_points = 0
        nb_extra_fpr_points = 0

    if thresholds is None:
        thresholds = np.zeros(shape=(0,))
    if fnr is not None:
        fnr_thresholds = scores.threshold_at_fnr(fnr)
        thresholds = np.concatenate([thresholds, fnr_thresholds])
    if fpr is not None:
        fpr_thresholds = scores.threshold_at_fpr(fpr)
        thresholds = np.concatenate([thresholds, fpr_thresholds])
    if len(thresholds) == 0:
        if nb_points is None:
            # Use all available scores as thresholds
            thresholds = np.concatenate([scores.pos, scores.neg])
        else:
            nb_fnr_points = nb_points // 2
            nb_fpr_points = nb_points - nb_fnr_points
            default_fnr = np.linspace(0.0, 1.0, nb_fnr_points, endpoint=True)
            fnr_thresholds = scores.threshold_at_fnr(default_fnr)

            default_fpr = np.linspace(0.0, 1.0, nb_fpr_points, endpoint=True)
            fpr_thresholds = scores.threshold_at_fpr(default_fpr)
            thresholds = np.concatenate([fnr_thresholds, fpr_thresholds])

    if nb_extra_points is not None:
        thresholds = np.sort(thresholds)

        fnr_min = np.min(scores.fnr(thresholds[[0, -1]]))
        fnr_max = np.max(scores.fnr(thresholds[[0, -1]]))
        fnr_extra = _add_extra_points(fnr_min, fnr_max, nb_extra_fnr_points)
        fnr_thresholds = scores.threshold_at_fnr(fnr_extra)

        fpr_min = np.min(scores.fpr(thresholds[[0, -1]]))
        fpr_max = np.max(scores.fpr(thresholds[[0, -1]]))
        fpr_extra = _add_extra_points(fpr_min, fpr_max, nb_extra_fpr_points)
        fpr_thresholds = scores.threshold_at_fpr(fpr_extra)

        thresholds = np.concatenate([thresholds, fnr_thresholds, fpr_thresholds])
        thresholds = np.concatenate(
            [
                thresholds,
                # Add thresholds that are smaller and larger than all scores
                [np.nextafter(scores.pos[0], -np.inf)],
                [np.nextafter(scores.pos[-1], np.inf)],
                [np.nextafter(scores.neg[0], -np.inf)],
                [np.nextafter(scores.neg[-1], np.inf)],
            ]
        )

    thresholds = np.sort(thresholds)
    if x_axis not in {"fnr", "fpr", "tnr", "tpr", "far", "frr", "tar", "trr"}:
        raise ValueError(f"Unknown value for x_axis: {x_axis}.")
    if x_axis in {"fpr", "tpr", "far", "tar"}:  # Decreasing metrics if score_class=pos
        thresholds = thresholds[::-1]
    if scores.score_class == BinaryLabel.neg:
        thresholds = thresholds[::-1]

    return thresholds


def _add_extra_points(x_min, x_max, nb_points: int) -> np.ndarray:
    nb_before = nb_points // 2
    nb_after = nb_points - nb_before
    x = np.array([])
    if x_min > 0.0:
        x_before = np.linspace(0.0, x_min, num=nb_before, endpoint=False)
        x = np.concatenate([x_before, x])
    if x_max < 1.0:
        # Thresholds will be sorted later. We use the reverse order to take advantage
        # of endpoint=False to exclude x[-1].
        x_after = np.linspace(1.0, x_max, num=nb_after, endpoint=False)
        x = np.concatenate([x, x_after])
    return x


def _apply_rule_of_three(
    p: np.ndarray, ci: np.ndarray, alpha: float, n: int
) -> np.ndarray:
    """
    When FNR or FPR is 0. or 1., we cannot use bootstrapping to estimate uncertainty
    as we don't have any samples to measure probabilities smaller than 1/n. In these
    cases, we use the generalised rule-of-three approximation (when alpha=0.05, then
    the confidence interval is approx. (0, 3/n).

    See: https://en.wikipedia.org/wiki/Rule_of_three_(statistics)
    """
    lower_correction = np.array([[0.0, 1 - math.pow(alpha, 1 / n)]])
    upper_correction = np.array([[math.pow(alpha, 1 / n), 1.0]])

    ci = np.where(p[:, np.newaxis] < 1.0 / n, lower_correction, ci)
    ci = np.where(p[:, np.newaxis] > (n - 1) / n, upper_correction, ci)
    return ci


def _aggregate_rectangles(
    x: np.ndarray, dxp: np.ndarray, dyp: np.ndarray
) -> np.ndarray:
    """
    Function aggregates rectangles into one continuous band and returns
    the lower and upper limits of the band at the points given by x.

    Args:
        x: Array of shape (N, ) of points where to evaluate the band
        dxp: Array of shape (M, 2) with x-limits of rectangles
        dyp: Array of shape (M, 2) with y-limits of rectangles

    Returns:
        ci: Array of shape (N, 2) where
            * ci[:, 0] is the minimum of dyp[j, 0] for which x[i] lies in the interval
              defined by dxp[j]
            * ci[:, 1] is the maximum of dyp[j, 1] with the same condition

            The value of ci is undefined if there is no rectangle covering a point.
    """
    x = np.asarray(x)
    dxp = np.asarray(dxp)
    dyp = np.asarray(dyp)

    # The variation at x is at least as large as given by the vertical extent of the
    # rectangles. In this case the rectangles are defined at the same points as where
    # we want to evaluate them. In the more general case, we would have to do 1D
    # interpolation, i.e., lower = np.interp(x_out, x, dyp[..., 0]).
    lower = np.copy(dyp[..., 0])
    upper = np.copy(dyp[..., 1])

    for j in range(len(x)):
        inside = (dxp[:, 0] <= x[j]) & (x[j] <= dxp[:, 1])
        lower_rect = np.min(dyp[inside, 0], initial=lower[j])
        upper_rect = np.max(dyp[inside, 1], initial=upper[j])
        lower[j] = min(lower[j], lower_rect)
        upper[j] = max(upper[j], upper_rect)

    ci = np.stack([lower, upper], axis=-1)
    return ci