Skip to content

Recalibration

reliably.recalibrate.temperature.TemperatureScaler

Bases: Calibrator

Calibrate by dividing logits by a scalar temperature > 0.

Fits temperature by minimizing NLL on the calibration split using golden-section search. Preserves the argmax (accuracy unchanged).

Parameters:

Name Type Description Default
temp_bounds tuple[float, float]

Search bounds for temperature.

(0.01, 20.0)

Examples:

>>> import numpy as np
>>> rng = np.random.default_rng(0)
>>> y_true = rng.integers(0, 2, 200)
>>> y_prob = rng.dirichlet([1, 1], 200)
>>> cal = TemperatureScaler().fit(y_prob, y_true)
>>> cal.T_ > 0
True
>>> cal_probs = cal.transform(y_prob)
>>> np.allclose(cal_probs.sum(axis=1), 1.0, atol=1e-6)
True
Source code in src/reliably/recalibrate/temperature.py
class TemperatureScaler(Calibrator):
    """Calibrate by dividing logits by a scalar temperature > 0.

    Fits temperature by minimizing NLL on the calibration split using
    golden-section search.  Preserves the argmax (accuracy unchanged).

    Parameters
    ----------
    temp_bounds : tuple[float, float]
        Search bounds for temperature.

    Examples
    --------
    >>> import numpy as np
    >>> rng = np.random.default_rng(0)
    >>> y_true = rng.integers(0, 2, 200)
    >>> y_prob = rng.dirichlet([1, 1], 200)
    >>> cal = TemperatureScaler().fit(y_prob, y_true)
    >>> cal.T_ > 0
    True
    >>> cal_probs = cal.transform(y_prob)
    >>> np.allclose(cal_probs.sum(axis=1), 1.0, atol=1e-6)
    True
    """

    T_: float
    logits_: NDArray[np.float64]

    def __init__(self, temp_bounds: tuple[float, float] = (0.01, 20.0)) -> None:
        self.T_bounds = temp_bounds

    def fit(self, y_prob: Any, y_true: Any) -> TemperatureScaler:
        """Fit temperature on calibration data.

        Parameters
        ----------
        y_prob : array-like
            Probabilities or logits, shape ``(N, K)`` or ``(N,)`` (binary).
        y_true : array-like
            Integer labels.

        Returns
        -------
        TemperatureScaler
        """
        y_prob_np = to_numpy(y_prob, dtype=np.float64)
        y_true_np = to_numpy(y_true, dtype=np.float64).astype(np.int64)
        n = len(y_true_np)

        if y_prob_np.ndim == 1:
            # Binary: convert to 2-class
            y_prob_np = np.stack([1.0 - y_prob_np, y_prob_np], axis=1)

        # Recover pseudo-logits as log(p) — up to a constant, sufficient for
        # temperature scaling because softmax is shift-invariant
        p_clipped = clip_probs(y_prob_np)
        logits = np.log(p_clipped)
        self.logits_ = logits

        def nll_at_temp(temp: float) -> float:
            probs = softmax(logits / temp)
            p_correct = clip_probs(probs[np.arange(n), y_true_np])
            return float(-np.log(p_correct).mean())

        result = minimize_scalar(nll_at_temp, bounds=self.T_bounds, method="bounded")
        self.T_ = float(result.x)
        self._fitted = True
        return self

    def transform(self, y_prob: Any) -> NDArray[np.float64]:
        """Apply temperature scaling.

        Parameters
        ----------
        y_prob : array-like
            Probabilities to calibrate.

        Returns
        -------
        NDArray[np.float64]
            Calibrated probabilities.
        """
        if not self._fitted:
            raise RuntimeError("Call fit() before transform().")
        y_prob_np = to_numpy(y_prob, dtype=np.float64)
        binary = y_prob_np.ndim == 1
        if binary:
            y_prob_np = np.stack([1.0 - y_prob_np, y_prob_np], axis=1)

        p_clipped = clip_probs(y_prob_np)
        logits = np.log(p_clipped)
        cal = softmax(logits / self.T_)
        if binary:
            return cal[:, 1]
        return cal

fit(y_prob, y_true)

Fit temperature on calibration data.

Parameters:

Name Type Description Default
y_prob array - like

Probabilities or logits, shape (N, K) or (N,) (binary).

required
y_true array - like

Integer labels.

required

Returns:

Type Description
TemperatureScaler
Source code in src/reliably/recalibrate/temperature.py
def fit(self, y_prob: Any, y_true: Any) -> TemperatureScaler:
    """Fit temperature on calibration data.

    Parameters
    ----------
    y_prob : array-like
        Probabilities or logits, shape ``(N, K)`` or ``(N,)`` (binary).
    y_true : array-like
        Integer labels.

    Returns
    -------
    TemperatureScaler
    """
    y_prob_np = to_numpy(y_prob, dtype=np.float64)
    y_true_np = to_numpy(y_true, dtype=np.float64).astype(np.int64)
    n = len(y_true_np)

    if y_prob_np.ndim == 1:
        # Binary: convert to 2-class
        y_prob_np = np.stack([1.0 - y_prob_np, y_prob_np], axis=1)

    # Recover pseudo-logits as log(p) — up to a constant, sufficient for
    # temperature scaling because softmax is shift-invariant
    p_clipped = clip_probs(y_prob_np)
    logits = np.log(p_clipped)
    self.logits_ = logits

    def nll_at_temp(temp: float) -> float:
        probs = softmax(logits / temp)
        p_correct = clip_probs(probs[np.arange(n), y_true_np])
        return float(-np.log(p_correct).mean())

    result = minimize_scalar(nll_at_temp, bounds=self.T_bounds, method="bounded")
    self.T_ = float(result.x)
    self._fitted = True
    return self

transform(y_prob)

Apply temperature scaling.

Parameters:

Name Type Description Default
y_prob array - like

Probabilities to calibrate.

required

Returns:

Type Description
NDArray[float64]

Calibrated probabilities.

Source code in src/reliably/recalibrate/temperature.py
def transform(self, y_prob: Any) -> NDArray[np.float64]:
    """Apply temperature scaling.

    Parameters
    ----------
    y_prob : array-like
        Probabilities to calibrate.

    Returns
    -------
    NDArray[np.float64]
        Calibrated probabilities.
    """
    if not self._fitted:
        raise RuntimeError("Call fit() before transform().")
    y_prob_np = to_numpy(y_prob, dtype=np.float64)
    binary = y_prob_np.ndim == 1
    if binary:
        y_prob_np = np.stack([1.0 - y_prob_np, y_prob_np], axis=1)

    p_clipped = clip_probs(y_prob_np)
    logits = np.log(p_clipped)
    cal = softmax(logits / self.T_)
    if binary:
        return cal[:, 1]
    return cal

reliably.recalibrate.platt.PlattScaler

Bases: Calibrator

Binary calibration via logistic regression: p_cal = σ(A·s + B).

Parameters:

Name Type Description Default
None
required

Examples:

>>> import numpy as np
>>> rng = np.random.default_rng(0)
>>> y = rng.integers(0, 2, 300)
>>> s = rng.uniform(0, 1, 300)
>>> cal = PlattScaler().fit(s, y)
>>> probs = cal.transform(s)
>>> probs.shape == s.shape
True
Source code in src/reliably/recalibrate/platt.py
class PlattScaler(Calibrator):
    """Binary calibration via logistic regression: ``p_cal = σ(A·s + B)``.

    Parameters
    ----------
    None

    Examples
    --------
    >>> import numpy as np
    >>> rng = np.random.default_rng(0)
    >>> y = rng.integers(0, 2, 300)
    >>> s = rng.uniform(0, 1, 300)
    >>> cal = PlattScaler().fit(s, y)
    >>> probs = cal.transform(s)
    >>> probs.shape == s.shape
    True
    """

    A_: float
    B_: float

    def fit(self, y_prob: Any, y_true: Any) -> PlattScaler:
        """Fit logistic regression on calibration split.

        Parameters
        ----------
        y_prob : array-like
            Binary scores, shape ``(N,)``.
        y_true : array-like
            Binary labels.

        Returns
        -------
        PlattScaler
        """
        s = to_numpy(y_prob, dtype=np.float64)
        y = to_numpy(y_true, dtype=np.float64)
        if s.ndim == 2:
            s = s[:, 1]

        def neg_log_lik(params: NDArray[np.float64]) -> float:
            a_coef, b_coef = params
            p = expit(a_coef * s + b_coef)
            p = np.clip(p, 1e-12, 1.0 - 1e-12)
            return float(-np.sum(y * np.log(p) + (1.0 - y) * np.log(1.0 - p)))

        res = minimize(neg_log_lik, [1.0, 0.0], method="L-BFGS-B")
        self.A_ = float(res.x[0])
        self.B_ = float(res.x[1])
        self._fitted = True
        return self

    def transform(self, y_prob: Any) -> NDArray[np.float64]:
        """Apply Platt scaling.

        Parameters
        ----------
        y_prob : array-like
            Binary scores.

        Returns
        -------
        NDArray[np.float64]
            Calibrated probabilities.
        """
        if not self._fitted:
            raise RuntimeError("Call fit() before transform().")
        s = to_numpy(y_prob, dtype=np.float64)
        if s.ndim == 2:
            s = s[:, 1]
        return np.array(expit(self.A_ * s + self.B_), dtype=np.float64)

fit(y_prob, y_true)

Fit logistic regression on calibration split.

Parameters:

Name Type Description Default
y_prob array - like

Binary scores, shape (N,).

required
y_true array - like

Binary labels.

required

Returns:

Type Description
PlattScaler
Source code in src/reliably/recalibrate/platt.py
def fit(self, y_prob: Any, y_true: Any) -> PlattScaler:
    """Fit logistic regression on calibration split.

    Parameters
    ----------
    y_prob : array-like
        Binary scores, shape ``(N,)``.
    y_true : array-like
        Binary labels.

    Returns
    -------
    PlattScaler
    """
    s = to_numpy(y_prob, dtype=np.float64)
    y = to_numpy(y_true, dtype=np.float64)
    if s.ndim == 2:
        s = s[:, 1]

    def neg_log_lik(params: NDArray[np.float64]) -> float:
        a_coef, b_coef = params
        p = expit(a_coef * s + b_coef)
        p = np.clip(p, 1e-12, 1.0 - 1e-12)
        return float(-np.sum(y * np.log(p) + (1.0 - y) * np.log(1.0 - p)))

    res = minimize(neg_log_lik, [1.0, 0.0], method="L-BFGS-B")
    self.A_ = float(res.x[0])
    self.B_ = float(res.x[1])
    self._fitted = True
    return self

transform(y_prob)

Apply Platt scaling.

Parameters:

Name Type Description Default
y_prob array - like

Binary scores.

required

Returns:

Type Description
NDArray[float64]

Calibrated probabilities.

Source code in src/reliably/recalibrate/platt.py
def transform(self, y_prob: Any) -> NDArray[np.float64]:
    """Apply Platt scaling.

    Parameters
    ----------
    y_prob : array-like
        Binary scores.

    Returns
    -------
    NDArray[np.float64]
        Calibrated probabilities.
    """
    if not self._fitted:
        raise RuntimeError("Call fit() before transform().")
    s = to_numpy(y_prob, dtype=np.float64)
    if s.ndim == 2:
        s = s[:, 1]
    return np.array(expit(self.A_ * s + self.B_), dtype=np.float64)

reliably.recalibrate.isotonic.IsotonicCalibrator

Bases: Calibrator

Nonparametric monotone calibration via isotonic regression.

Wraps sklearn.isotonic.IsotonicRegression and requires the scikit-learn optional dependency.

Examples:

>>> import numpy as np
>>> rng = np.random.default_rng(0)
>>> y = rng.integers(0, 2, 300)
>>> s = rng.uniform(0, 1, 300)
>>> cal = IsotonicCalibrator().fit(s, y)
>>> probs = cal.transform(s)
>>> probs.shape == s.shape
True
Source code in src/reliably/recalibrate/isotonic.py
class IsotonicCalibrator(Calibrator):
    """Nonparametric monotone calibration via isotonic regression.

    Wraps ``sklearn.isotonic.IsotonicRegression`` and requires the
    ``scikit-learn`` optional dependency.

    Examples
    --------
    >>> import numpy as np
    >>> rng = np.random.default_rng(0)
    >>> y = rng.integers(0, 2, 300)
    >>> s = rng.uniform(0, 1, 300)
    >>> cal = IsotonicCalibrator().fit(s, y)
    >>> probs = cal.transform(s)
    >>> probs.shape == s.shape
    True
    """

    def fit(self, y_prob: Any, y_true: Any) -> IsotonicCalibrator:
        """Fit isotonic regression on calibration split.

        Parameters
        ----------
        y_prob : array-like
            Scores, shape ``(N,)``.
        y_true : array-like
            Binary labels.

        Returns
        -------
        IsotonicCalibrator
        """
        try:
            from sklearn.isotonic import IsotonicRegression  # type: ignore
        except ImportError as exc:
            raise ImportError(
                "scikit-learn is required for IsotonicCalibrator. "
                "Install with: pip install reliably[sklearn]"
            ) from exc

        s = to_numpy(y_prob, dtype=np.float64)
        y = to_numpy(y_true, dtype=np.float64)
        if s.ndim == 2:
            s = s[:, 1]

        self._ir = IsotonicRegression(out_of_bounds="clip", y_min=0.0, y_max=1.0)
        self._ir.fit(s, y)
        self._fitted = True
        return self

    def transform(self, y_prob: Any) -> NDArray[np.float64]:
        """Apply isotonic calibration.

        Parameters
        ----------
        y_prob : array-like
            Scores.

        Returns
        -------
        NDArray[np.float64]
            Calibrated probabilities.
        """
        if not self._fitted:
            raise RuntimeError("Call fit() before transform().")
        s = to_numpy(y_prob, dtype=np.float64)
        if s.ndim == 2:
            s = s[:, 1]
        return np.array(self._ir.transform(s), dtype=np.float64)

fit(y_prob, y_true)

Fit isotonic regression on calibration split.

Parameters:

Name Type Description Default
y_prob array - like

Scores, shape (N,).

required
y_true array - like

Binary labels.

required

Returns:

Type Description
IsotonicCalibrator
Source code in src/reliably/recalibrate/isotonic.py
def fit(self, y_prob: Any, y_true: Any) -> IsotonicCalibrator:
    """Fit isotonic regression on calibration split.

    Parameters
    ----------
    y_prob : array-like
        Scores, shape ``(N,)``.
    y_true : array-like
        Binary labels.

    Returns
    -------
    IsotonicCalibrator
    """
    try:
        from sklearn.isotonic import IsotonicRegression  # type: ignore
    except ImportError as exc:
        raise ImportError(
            "scikit-learn is required for IsotonicCalibrator. "
            "Install with: pip install reliably[sklearn]"
        ) from exc

    s = to_numpy(y_prob, dtype=np.float64)
    y = to_numpy(y_true, dtype=np.float64)
    if s.ndim == 2:
        s = s[:, 1]

    self._ir = IsotonicRegression(out_of_bounds="clip", y_min=0.0, y_max=1.0)
    self._ir.fit(s, y)
    self._fitted = True
    return self

transform(y_prob)

Apply isotonic calibration.

Parameters:

Name Type Description Default
y_prob array - like

Scores.

required

Returns:

Type Description
NDArray[float64]

Calibrated probabilities.

Source code in src/reliably/recalibrate/isotonic.py
def transform(self, y_prob: Any) -> NDArray[np.float64]:
    """Apply isotonic calibration.

    Parameters
    ----------
    y_prob : array-like
        Scores.

    Returns
    -------
    NDArray[np.float64]
        Calibrated probabilities.
    """
    if not self._fitted:
        raise RuntimeError("Call fit() before transform().")
    s = to_numpy(y_prob, dtype=np.float64)
    if s.ndim == 2:
        s = s[:, 1]
    return np.array(self._ir.transform(s), dtype=np.float64)

reliably.recalibrate.beta.BetaCalibrator

Bases: Calibrator

Beta calibration: logit(p_cal) = c + a·log(s) − b·log(1 − s).

Parameters:

Name Type Description Default
constrain_ab bool

If True (default), constrain a, b ≥ 0.

True

Examples:

>>> import numpy as np
>>> rng = np.random.default_rng(0)
>>> y = rng.integers(0, 2, 300)
>>> s = rng.uniform(0.05, 0.95, 300)
>>> cal = BetaCalibrator().fit(s, y)
>>> probs = cal.transform(s)
>>> probs.shape == s.shape
True
Source code in src/reliably/recalibrate/beta.py
class BetaCalibrator(Calibrator):
    """Beta calibration: ``logit(p_cal) = c + a·log(s) − b·log(1 − s)``.

    Parameters
    ----------
    constrain_ab : bool
        If ``True`` (default), constrain ``a, b ≥ 0``.

    Examples
    --------
    >>> import numpy as np
    >>> rng = np.random.default_rng(0)
    >>> y = rng.integers(0, 2, 300)
    >>> s = rng.uniform(0.05, 0.95, 300)
    >>> cal = BetaCalibrator().fit(s, y)
    >>> probs = cal.transform(s)
    >>> probs.shape == s.shape
    True
    """

    a_: float
    b_: float
    c_: float

    def __init__(self, constrain_ab: bool = True) -> None:
        self.constrain_ab = constrain_ab

    def fit(self, y_prob: Any, y_true: Any) -> BetaCalibrator:
        """Fit beta calibration on calibration split.

        Parameters
        ----------
        y_prob : array-like
            Binary scores.
        y_true : array-like
            Binary labels.

        Returns
        -------
        BetaCalibrator
        """
        s = to_numpy(y_prob, dtype=np.float64)
        y = to_numpy(y_true, dtype=np.float64)
        if s.ndim == 2:
            s = s[:, 1]

        s_clipped = clip_probs(s)
        log_s = np.log(s_clipped)
        log_1ms = np.log(1.0 - s_clipped)

        def neg_log_lik(params: NDArray[np.float64]) -> float:
            a, b, c = params
            logit_p = c + a * log_s - b * log_1ms
            p = expit(logit_p)
            p = np.clip(p, 1e-12, 1.0 - 1e-12)
            return float(-np.sum(y * np.log(p) + (1.0 - y) * np.log(1.0 - p)))

        bounds = [(0.0, None), (0.0, None), (None, None)] if self.constrain_ab else None
        res = minimize(neg_log_lik, [1.0, 1.0, 0.0], method="L-BFGS-B", bounds=bounds)
        self.a_, self.b_, self.c_ = float(res.x[0]), float(res.x[1]), float(res.x[2])
        self._fitted = True
        return self

    def transform(self, y_prob: Any) -> NDArray[np.float64]:
        """Apply beta calibration.

        Parameters
        ----------
        y_prob : array-like
            Binary scores.

        Returns
        -------
        NDArray[np.float64]
            Calibrated probabilities.
        """
        if not self._fitted:
            raise RuntimeError("Call fit() before transform().")
        s = to_numpy(y_prob, dtype=np.float64)
        if s.ndim == 2:
            s = s[:, 1]
        s_clipped = clip_probs(s)
        logit_p = self.c_ + self.a_ * np.log(s_clipped) - self.b_ * np.log(1.0 - s_clipped)
        return np.array(expit(logit_p), dtype=np.float64)

fit(y_prob, y_true)

Fit beta calibration on calibration split.

Parameters:

Name Type Description Default
y_prob array - like

Binary scores.

required
y_true array - like

Binary labels.

required

Returns:

Type Description
BetaCalibrator
Source code in src/reliably/recalibrate/beta.py
def fit(self, y_prob: Any, y_true: Any) -> BetaCalibrator:
    """Fit beta calibration on calibration split.

    Parameters
    ----------
    y_prob : array-like
        Binary scores.
    y_true : array-like
        Binary labels.

    Returns
    -------
    BetaCalibrator
    """
    s = to_numpy(y_prob, dtype=np.float64)
    y = to_numpy(y_true, dtype=np.float64)
    if s.ndim == 2:
        s = s[:, 1]

    s_clipped = clip_probs(s)
    log_s = np.log(s_clipped)
    log_1ms = np.log(1.0 - s_clipped)

    def neg_log_lik(params: NDArray[np.float64]) -> float:
        a, b, c = params
        logit_p = c + a * log_s - b * log_1ms
        p = expit(logit_p)
        p = np.clip(p, 1e-12, 1.0 - 1e-12)
        return float(-np.sum(y * np.log(p) + (1.0 - y) * np.log(1.0 - p)))

    bounds = [(0.0, None), (0.0, None), (None, None)] if self.constrain_ab else None
    res = minimize(neg_log_lik, [1.0, 1.0, 0.0], method="L-BFGS-B", bounds=bounds)
    self.a_, self.b_, self.c_ = float(res.x[0]), float(res.x[1]), float(res.x[2])
    self._fitted = True
    return self

transform(y_prob)

Apply beta calibration.

Parameters:

Name Type Description Default
y_prob array - like

Binary scores.

required

Returns:

Type Description
NDArray[float64]

Calibrated probabilities.

Source code in src/reliably/recalibrate/beta.py
def transform(self, y_prob: Any) -> NDArray[np.float64]:
    """Apply beta calibration.

    Parameters
    ----------
    y_prob : array-like
        Binary scores.

    Returns
    -------
    NDArray[np.float64]
        Calibrated probabilities.
    """
    if not self._fitted:
        raise RuntimeError("Call fit() before transform().")
    s = to_numpy(y_prob, dtype=np.float64)
    if s.ndim == 2:
        s = s[:, 1]
    s_clipped = clip_probs(s)
    logit_p = self.c_ + self.a_ * np.log(s_clipped) - self.b_ * np.log(1.0 - s_clipped)
    return np.array(expit(logit_p), dtype=np.float64)

reliably.recalibrate.histogram.HistogramCalibrator

Bases: Calibrator

Replace each bin's score with its empirical accuracy on the calibration split.

Parameters:

Name Type Description Default
n_bins int

Number of histogram bins.

15
binning str

"equal_width" or "adaptive".

'adaptive'

Examples:

>>> import numpy as np
>>> rng = np.random.default_rng(0)
>>> y = rng.integers(0, 2, 300)
>>> s = rng.uniform(0, 1, 300)
>>> cal = HistogramCalibrator().fit(s, y)
>>> probs = cal.transform(s)
>>> probs.shape == s.shape
True
Source code in src/reliably/recalibrate/histogram.py
class HistogramCalibrator(Calibrator):
    """Replace each bin's score with its empirical accuracy on the calibration split.

    Parameters
    ----------
    n_bins : int
        Number of histogram bins.
    binning : str
        ``"equal_width"`` or ``"adaptive"``.

    Examples
    --------
    >>> import numpy as np
    >>> rng = np.random.default_rng(0)
    >>> y = rng.integers(0, 2, 300)
    >>> s = rng.uniform(0, 1, 300)
    >>> cal = HistogramCalibrator().fit(s, y)
    >>> probs = cal.transform(s)
    >>> probs.shape == s.shape
    True
    """

    edges_: NDArray[np.float64]
    bin_acc_: NDArray[np.float64]

    def __init__(self, n_bins: int = 15, binning: str = "adaptive") -> None:
        self.n_bins = n_bins
        self.binning = binning

    def fit(self, y_prob: Any, y_true: Any) -> HistogramCalibrator:
        """Fit histogram binning on calibration split.

        Parameters
        ----------
        y_prob : array-like
            Scores.
        y_true : array-like
            Binary labels.

        Returns
        -------
        HistogramCalibrator
        """
        s = to_numpy(y_prob, dtype=np.float64)
        y = to_numpy(y_true, dtype=np.float64)
        if s.ndim == 2:
            s = s[:, 1]

        edges = (
            equal_width_bins(self.n_bins)
            if self.binning == "equal_width"
            else adaptive_bins(s, self.n_bins)
        )
        _, bin_acc, bin_n = bin_stats(s, y, edges)
        # Where a bin is empty, use the base rate
        base_rate = float(y.mean())
        bin_acc[bin_n == 0] = base_rate

        self.edges_ = edges
        self.bin_acc_ = bin_acc
        self._fitted = True
        return self

    def transform(self, y_prob: Any) -> NDArray[np.float64]:
        """Apply histogram calibration.

        Parameters
        ----------
        y_prob : array-like
            Scores.

        Returns
        -------
        NDArray[np.float64]
            Calibrated probabilities.
        """
        if not self._fitted:
            raise RuntimeError("Call fit() before transform().")
        s = to_numpy(y_prob, dtype=np.float64)
        if s.ndim == 2:
            s = s[:, 1]

        out = np.empty_like(s)
        n_bins = len(self.edges_) - 1
        for m in range(n_bins):
            lo, hi = self.edges_[m], self.edges_[m + 1]
            if m == n_bins - 1:
                mask = (s >= lo) & (s <= hi)
            else:
                mask = (s >= lo) & (s < hi)
            out[mask] = self.bin_acc_[m]
        return out

fit(y_prob, y_true)

Fit histogram binning on calibration split.

Parameters:

Name Type Description Default
y_prob array - like

Scores.

required
y_true array - like

Binary labels.

required

Returns:

Type Description
HistogramCalibrator
Source code in src/reliably/recalibrate/histogram.py
def fit(self, y_prob: Any, y_true: Any) -> HistogramCalibrator:
    """Fit histogram binning on calibration split.

    Parameters
    ----------
    y_prob : array-like
        Scores.
    y_true : array-like
        Binary labels.

    Returns
    -------
    HistogramCalibrator
    """
    s = to_numpy(y_prob, dtype=np.float64)
    y = to_numpy(y_true, dtype=np.float64)
    if s.ndim == 2:
        s = s[:, 1]

    edges = (
        equal_width_bins(self.n_bins)
        if self.binning == "equal_width"
        else adaptive_bins(s, self.n_bins)
    )
    _, bin_acc, bin_n = bin_stats(s, y, edges)
    # Where a bin is empty, use the base rate
    base_rate = float(y.mean())
    bin_acc[bin_n == 0] = base_rate

    self.edges_ = edges
    self.bin_acc_ = bin_acc
    self._fitted = True
    return self

transform(y_prob)

Apply histogram calibration.

Parameters:

Name Type Description Default
y_prob array - like

Scores.

required

Returns:

Type Description
NDArray[float64]

Calibrated probabilities.

Source code in src/reliably/recalibrate/histogram.py
def transform(self, y_prob: Any) -> NDArray[np.float64]:
    """Apply histogram calibration.

    Parameters
    ----------
    y_prob : array-like
        Scores.

    Returns
    -------
    NDArray[np.float64]
        Calibrated probabilities.
    """
    if not self._fitted:
        raise RuntimeError("Call fit() before transform().")
    s = to_numpy(y_prob, dtype=np.float64)
    if s.ndim == 2:
        s = s[:, 1]

    out = np.empty_like(s)
    n_bins = len(self.edges_) - 1
    for m in range(n_bins):
        lo, hi = self.edges_[m], self.edges_[m + 1]
        if m == n_bins - 1:
            mask = (s >= lo) & (s <= hi)
        else:
            mask = (s >= lo) & (s < hi)
        out[mask] = self.bin_acc_[m]
    return out

reliably.recalibrate.matrix.VectorScaler

Bases: Calibrator

Per-class temperature scaling: p_cal = softmax(w ⊙ logits + b).

More expressive than scalar temperature but less prone to overfitting than full matrix scaling.

Examples:

>>> import numpy as np
>>> rng = np.random.default_rng(0)
>>> y = rng.integers(0, 3, 300)
>>> p = rng.dirichlet([1, 1, 1], 300)
>>> cal = VectorScaler().fit(p, y)
>>> probs = cal.transform(p)
>>> np.allclose(probs.sum(axis=1), 1.0, atol=1e-6)
True
Source code in src/reliably/recalibrate/matrix.py
class VectorScaler(Calibrator):
    """Per-class temperature scaling: ``p_cal = softmax(w ⊙ logits + b)``.

    More expressive than scalar temperature but less prone to overfitting
    than full matrix scaling.

    Examples
    --------
    >>> import numpy as np
    >>> rng = np.random.default_rng(0)
    >>> y = rng.integers(0, 3, 300)
    >>> p = rng.dirichlet([1, 1, 1], 300)
    >>> cal = VectorScaler().fit(p, y)
    >>> probs = cal.transform(p)
    >>> np.allclose(probs.sum(axis=1), 1.0, atol=1e-6)
    True
    """

    W_: NDArray[np.float64]
    b_: NDArray[np.float64]

    def fit(self, y_prob: Any, y_true: Any) -> VectorScaler:
        """Fit per-class vector scaling.

        Parameters
        ----------
        y_prob : array-like
            Probabilities ``(N, K)``.
        y_true : array-like
            Integer labels.

        Returns
        -------
        VectorScaler
        """
        y_prob_np = to_numpy(y_prob, dtype=np.float64)
        y_true_np = to_numpy(y_true, dtype=np.float64).astype(np.int64)
        n = len(y_true_np)
        k = y_prob_np.shape[1] if y_prob_np.ndim == 2 else 2

        if y_prob_np.ndim == 1:
            y_prob_np = np.stack([1.0 - y_prob_np, y_prob_np], axis=1)

        logits = np.log(clip_probs(y_prob_np))

        def neg_nll(params: NDArray[np.float64]) -> float:
            weights = params[:k]
            b = params[k:]
            z = logits * weights[None, :] + b[None, :]
            probs = softmax(z)
            p_correct = clip_probs(probs[np.arange(n), y_true_np])
            return float(-np.log(p_correct).mean())

        x0 = np.concatenate([np.ones(k), np.zeros(k)])
        res = minimize(neg_nll, x0, method="L-BFGS-B")
        self.W_ = res.x[:k]
        self.b_ = res.x[k:]
        self._fitted = True
        return self

    def transform(self, y_prob: Any) -> NDArray[np.float64]:
        """Apply vector scaling.

        Parameters
        ----------
        y_prob : array-like
            Probabilities.

        Returns
        -------
        NDArray[np.float64]
            Calibrated probabilities.
        """
        if not self._fitted:
            raise RuntimeError("Call fit() before transform().")
        y_prob_np = to_numpy(y_prob, dtype=np.float64)
        binary = y_prob_np.ndim == 1
        if binary:
            y_prob_np = np.stack([1.0 - y_prob_np, y_prob_np], axis=1)
        logits = np.log(clip_probs(y_prob_np))
        z = logits * self.W_[None, :] + self.b_[None, :]
        cal = softmax(z)
        if binary:
            return cal[:, 1]
        return cal

fit(y_prob, y_true)

Fit per-class vector scaling.

Parameters:

Name Type Description Default
y_prob array - like

Probabilities (N, K).

required
y_true array - like

Integer labels.

required

Returns:

Type Description
VectorScaler
Source code in src/reliably/recalibrate/matrix.py
def fit(self, y_prob: Any, y_true: Any) -> VectorScaler:
    """Fit per-class vector scaling.

    Parameters
    ----------
    y_prob : array-like
        Probabilities ``(N, K)``.
    y_true : array-like
        Integer labels.

    Returns
    -------
    VectorScaler
    """
    y_prob_np = to_numpy(y_prob, dtype=np.float64)
    y_true_np = to_numpy(y_true, dtype=np.float64).astype(np.int64)
    n = len(y_true_np)
    k = y_prob_np.shape[1] if y_prob_np.ndim == 2 else 2

    if y_prob_np.ndim == 1:
        y_prob_np = np.stack([1.0 - y_prob_np, y_prob_np], axis=1)

    logits = np.log(clip_probs(y_prob_np))

    def neg_nll(params: NDArray[np.float64]) -> float:
        weights = params[:k]
        b = params[k:]
        z = logits * weights[None, :] + b[None, :]
        probs = softmax(z)
        p_correct = clip_probs(probs[np.arange(n), y_true_np])
        return float(-np.log(p_correct).mean())

    x0 = np.concatenate([np.ones(k), np.zeros(k)])
    res = minimize(neg_nll, x0, method="L-BFGS-B")
    self.W_ = res.x[:k]
    self.b_ = res.x[k:]
    self._fitted = True
    return self

transform(y_prob)

Apply vector scaling.

Parameters:

Name Type Description Default
y_prob array - like

Probabilities.

required

Returns:

Type Description
NDArray[float64]

Calibrated probabilities.

Source code in src/reliably/recalibrate/matrix.py
def transform(self, y_prob: Any) -> NDArray[np.float64]:
    """Apply vector scaling.

    Parameters
    ----------
    y_prob : array-like
        Probabilities.

    Returns
    -------
    NDArray[np.float64]
        Calibrated probabilities.
    """
    if not self._fitted:
        raise RuntimeError("Call fit() before transform().")
    y_prob_np = to_numpy(y_prob, dtype=np.float64)
    binary = y_prob_np.ndim == 1
    if binary:
        y_prob_np = np.stack([1.0 - y_prob_np, y_prob_np], axis=1)
    logits = np.log(clip_probs(y_prob_np))
    z = logits * self.W_[None, :] + self.b_[None, :]
    cal = softmax(z)
    if binary:
        return cal[:, 1]
    return cal

reliably.recalibrate.matrix.MatrixScaler

Bases: Calibrator

Full K×K affine map on logits: p_cal = softmax(W·logits + b).

More expressive; gate behind method="matrix".

Examples:

>>> import numpy as np
>>> rng = np.random.default_rng(0)
>>> y = rng.integers(0, 3, 300)
>>> p = rng.dirichlet([1, 1, 1], 300)
>>> cal = MatrixScaler().fit(p, y)
>>> probs = cal.transform(p)
>>> np.allclose(probs.sum(axis=1), 1.0, atol=1e-6)
True
Source code in src/reliably/recalibrate/matrix.py
class MatrixScaler(Calibrator):
    """Full K×K affine map on logits: ``p_cal = softmax(W·logits + b)``.

    More expressive; gate behind ``method="matrix"``.

    Examples
    --------
    >>> import numpy as np
    >>> rng = np.random.default_rng(0)
    >>> y = rng.integers(0, 3, 300)
    >>> p = rng.dirichlet([1, 1, 1], 300)
    >>> cal = MatrixScaler().fit(p, y)
    >>> probs = cal.transform(p)
    >>> np.allclose(probs.sum(axis=1), 1.0, atol=1e-6)
    True
    """

    W_: NDArray[np.float64]
    b_: NDArray[np.float64]

    def fit(self, y_prob: Any, y_true: Any) -> MatrixScaler:
        """Fit full matrix scaling.

        Parameters
        ----------
        y_prob : array-like
            Probabilities ``(N, K)``.
        y_true : array-like
            Integer labels.

        Returns
        -------
        MatrixScaler
        """
        y_prob_np = to_numpy(y_prob, dtype=np.float64)
        y_true_np = to_numpy(y_true, dtype=np.float64).astype(np.int64)
        n = len(y_true_np)
        k = y_prob_np.shape[1] if y_prob_np.ndim == 2 else 2

        if y_prob_np.ndim == 1:
            y_prob_np = np.stack([1.0 - y_prob_np, y_prob_np], axis=1)

        logits = np.log(clip_probs(y_prob_np))

        def neg_nll(params: NDArray[np.float64]) -> float:
            weight_mat = params[: k * k].reshape(k, k)
            b = params[k * k :]
            z = logits @ weight_mat.T + b[None, :]
            probs = softmax(z)
            p_correct = clip_probs(probs[np.arange(n), y_true_np])
            return float(-np.log(p_correct).mean())

        x0 = np.concatenate([np.eye(k).ravel(), np.zeros(k)])
        res = minimize(neg_nll, x0, method="L-BFGS-B")
        self.W_ = res.x[: k * k].reshape(k, k)
        self.b_ = res.x[k * k :]
        self._fitted = True
        return self

    def transform(self, y_prob: Any) -> NDArray[np.float64]:
        """Apply matrix scaling.

        Parameters
        ----------
        y_prob : array-like
            Probabilities.

        Returns
        -------
        NDArray[np.float64]
            Calibrated probabilities.
        """
        if not self._fitted:
            raise RuntimeError("Call fit() before transform().")
        y_prob_np = to_numpy(y_prob, dtype=np.float64)
        binary = y_prob_np.ndim == 1
        if binary:
            y_prob_np = np.stack([1.0 - y_prob_np, y_prob_np], axis=1)
        logits = np.log(clip_probs(y_prob_np))
        z = logits @ self.W_.T + self.b_[None, :]
        cal = softmax(z)
        if binary:
            return cal[:, 1]
        return cal

fit(y_prob, y_true)

Fit full matrix scaling.

Parameters:

Name Type Description Default
y_prob array - like

Probabilities (N, K).

required
y_true array - like

Integer labels.

required

Returns:

Type Description
MatrixScaler
Source code in src/reliably/recalibrate/matrix.py
def fit(self, y_prob: Any, y_true: Any) -> MatrixScaler:
    """Fit full matrix scaling.

    Parameters
    ----------
    y_prob : array-like
        Probabilities ``(N, K)``.
    y_true : array-like
        Integer labels.

    Returns
    -------
    MatrixScaler
    """
    y_prob_np = to_numpy(y_prob, dtype=np.float64)
    y_true_np = to_numpy(y_true, dtype=np.float64).astype(np.int64)
    n = len(y_true_np)
    k = y_prob_np.shape[1] if y_prob_np.ndim == 2 else 2

    if y_prob_np.ndim == 1:
        y_prob_np = np.stack([1.0 - y_prob_np, y_prob_np], axis=1)

    logits = np.log(clip_probs(y_prob_np))

    def neg_nll(params: NDArray[np.float64]) -> float:
        weight_mat = params[: k * k].reshape(k, k)
        b = params[k * k :]
        z = logits @ weight_mat.T + b[None, :]
        probs = softmax(z)
        p_correct = clip_probs(probs[np.arange(n), y_true_np])
        return float(-np.log(p_correct).mean())

    x0 = np.concatenate([np.eye(k).ravel(), np.zeros(k)])
    res = minimize(neg_nll, x0, method="L-BFGS-B")
    self.W_ = res.x[: k * k].reshape(k, k)
    self.b_ = res.x[k * k :]
    self._fitted = True
    return self

transform(y_prob)

Apply matrix scaling.

Parameters:

Name Type Description Default
y_prob array - like

Probabilities.

required

Returns:

Type Description
NDArray[float64]

Calibrated probabilities.

Source code in src/reliably/recalibrate/matrix.py
def transform(self, y_prob: Any) -> NDArray[np.float64]:
    """Apply matrix scaling.

    Parameters
    ----------
    y_prob : array-like
        Probabilities.

    Returns
    -------
    NDArray[np.float64]
        Calibrated probabilities.
    """
    if not self._fitted:
        raise RuntimeError("Call fit() before transform().")
    y_prob_np = to_numpy(y_prob, dtype=np.float64)
    binary = y_prob_np.ndim == 1
    if binary:
        y_prob_np = np.stack([1.0 - y_prob_np, y_prob_np], axis=1)
    logits = np.log(clip_probs(y_prob_np))
    z = logits @ self.W_.T + self.b_[None, :]
    cal = softmax(z)
    if binary:
        return cal[:, 1]
    return cal