Skip to content

Commit

Permalink
REVERT ENH add the parameter prefit in the FixedThresholdClassifier (#…
Browse files Browse the repository at this point in the history
  • Loading branch information
adrinjalali authored Nov 5, 2024
1 parent 004cf9e commit 70aab36
Show file tree
Hide file tree
Showing 7 changed files with 165 additions and 68 deletions.
4 changes: 3 additions & 1 deletion doc/modules/classification_threshold.rst
Original file line number Diff line number Diff line change
Expand Up @@ -144,7 +144,9 @@ Manually setting the decision threshold
The previous sections discussed strategies to find an optimal decision threshold. It is
also possible to manually set the decision threshold using the class
:class:`~sklearn.model_selection.FixedThresholdClassifier`. In case that you don't want
to refit the model when calling `fit`, you can set the parameter `prefit=True`.
to refit the model when calling `fit`, wrap your sub-estimator with a
:class:`~sklearn.frozen.FrozenEstimator` and do
``FixedThresholdClassifier(FrozenEstimator(estimator), ...)``.

Examples
--------
Expand Down

This file was deleted.

Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
- There is no need to call `fit` on a
:class:`~sklearn.model_selection.FixedThresholdClassifier` if the underlying
estimator is already fitted.
By :user:`Adrin Jalali <adrinjalali>`
98 changes: 98 additions & 0 deletions examples/frozen/plot_frozen_examples.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
"""
===================================
Examples of Using `FrozenEstimator`
===================================
This examples showcases some use cases of :class:`~sklearn.frozen.FrozenEstimator`.
:class:`~sklearn.frozen.FrozenEstimator` is a utility class that allows to freeze a
fitted estimator. This is useful, for instance, when we want to pass a fitted estimator
to a meta-estimator, such as :class:`~sklearn.model_selection.FixedThresholdClassifier`
without letting the meta-estimator refit the estimator.
"""

# Authors: The scikit-learn developers
# SPDX-License-Identifier: BSD-3-Clause

# %%
# Setting a decision threshold for a pre-fitted classifier
# --------------------------------------------------------
# Fitted classifiers in scikit-learn use an arbitrary decision threshold to decide
# which class the given sample belongs to. The decision threshold is either `0.0` on the
# value returned by :term:`decision_function`, or `0.5` on the probability returned by
# :term:`predict_proba`.
#
# However, one might want to set a custom decision threshold. We can do this by
# using :class:`~sklearn.model_selection.FixedThresholdClassifier` and wrapping the
# classifier with :class:`~sklearn.frozen.FrozenEstimator`.
from sklearn.datasets import make_classification
from sklearn.frozen import FrozenEstimator
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import FixedThresholdClassifier, train_test_split

X, y = make_classification(n_samples=1000, random_state=0)
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)
classifier = LogisticRegression().fit(X_train, y_train)

print(
"Probability estimates for three data points:\n"
f"{classifier.predict_proba(X_test[-3:]).round(3)}"
)
print(
"Predicted class for the same three data points:\n"
f"{classifier.predict(X_test[-3:])}"
)

# %%
# Now imagine you'd want to set a different decision threshold on the probability
# estimates. We can do this by wrapping the classifier with
# :class:`~sklearn.frozen.FrozenEstimator` and passing it to
# :class:`~sklearn.model_selection.FixedThresholdClassifier`.

threshold_classifier = FixedThresholdClassifier(
estimator=FrozenEstimator(classifier), threshold=0.9
)

# %%
# Note that in the above piece of code, calling `fit` on
# :class:`~sklearn.model_selection.FixedThresholdClassifier` does not refit the
# underlying classifier.
#
# Now, let's see how the predictions changed with respect to the probability
# threshold.
print(
"Probability estimates for three data points with FixedThresholdClassifier:\n"
f"{threshold_classifier.predict_proba(X_test[-3:]).round(3)}"
)
print(
"Predicted class for the same three data points with FixedThresholdClassifier:\n"
f"{threshold_classifier.predict(X_test[-3:])}"
)

# %%
# We see that the probability estimates stay the same, but since a different decision
# threshold is used, the predicted classes are different.
#
# Please refer to
# :ref:`sphx_glr_auto_examples_model_selection_plot_cost_sensitive_learning.py`
# to learn about cost-sensitive learning and decision threshold tuning.

# %%
# Calibration of a pre-fitted classifier
# --------------------------------------
# You can use :class:`~sklearn.frozen.FrozenEstimator` to calibrate a pre-fitted
# classifier using :class:`~sklearn.calibration.CalibratedClassifierCV`.
from sklearn.calibration import CalibratedClassifierCV
from sklearn.metrics import brier_score_loss

calibrated_classifier = CalibratedClassifierCV(
estimator=FrozenEstimator(classifier)
).fit(X_train, y_train)

prob_pos_clf = classifier.predict_proba(X_test)[:, 1]
clf_score = brier_score_loss(y_test, prob_pos_clf)
print(f"No calibration: {clf_score:.3f}")

prob_pos_calibrated = calibrated_classifier.predict_proba(X_test)[:, 1]
calibrated_score = brier_score_loss(y_test, prob_pos_calibrated)
print(f"With calibration: {calibrated_score:.3f}")
9 changes: 6 additions & 3 deletions examples/model_selection/plot_cost_sensitive_learning.py
Original file line number Diff line number Diff line change
Expand Up @@ -660,15 +660,18 @@ def business_metric(y_true, y_pred, amount):
#
# The class :class:`~sklearn.model_selection.FixedThresholdClassifier` allows us to
# manually set the decision threshold. At prediction time, it behave as the previous
# tuned model but no search is performed during the fitting process.
# tuned model but no search is performed during the fitting process. Note that here
# we use :class:`~sklearn.frozen.FrozenEstimator` to wrap the predictive model to
# avoid any refitting.
#
# Here, we will reuse the decision threshold found in the previous section to create a
# new model and check that it gives the same results.
from sklearn.frozen import FrozenEstimator
from sklearn.model_selection import FixedThresholdClassifier

model_fixed_threshold = FixedThresholdClassifier(
estimator=model, threshold=tuned_model.best_threshold_, prefit=True
).fit(data_train, target_train)
estimator=FrozenEstimator(model), threshold=tuned_model.best_threshold_
)

# %%
business_score = business_scorer(
Expand Down
59 changes: 34 additions & 25 deletions sklearn/model_selection/_classification_threshold.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,13 @@
from ._split import StratifiedShuffleSplit, check_cv


def _check_is_fitted(estimator):
try:
check_is_fitted(estimator.estimator)
except NotFittedError:
check_is_fitted(estimator, "estimator_")


def _estimator_has(attr):
"""Check if we can delegate a method to the underlying estimator.
Expand Down Expand Up @@ -170,8 +177,9 @@ def predict_proba(self, X):
probabilities : ndarray of shape (n_samples, n_classes)
The class probabilities of the input samples.
"""
check_is_fitted(self, "estimator_")
return self.estimator_.predict_proba(X)
_check_is_fitted(self)
estimator = getattr(self, "estimator_", self.estimator)
return estimator.predict_proba(X)

@available_if(_estimator_has("predict_log_proba"))
def predict_log_proba(self, X):
Expand All @@ -188,8 +196,9 @@ def predict_log_proba(self, X):
log_probabilities : ndarray of shape (n_samples, n_classes)
The logarithm class probabilities of the input samples.
"""
check_is_fitted(self, "estimator_")
return self.estimator_.predict_log_proba(X)
_check_is_fitted(self)
estimator = getattr(self, "estimator_", self.estimator)
return estimator.predict_log_proba(X)

@available_if(_estimator_has("decision_function"))
def decision_function(self, X):
Expand All @@ -206,8 +215,9 @@ def decision_function(self, X):
decisions : ndarray of shape (n_samples,)
The decision function computed the fitted estimator.
"""
check_is_fitted(self, "estimator_")
return self.estimator_.decision_function(X)
_check_is_fitted(self)
estimator = getattr(self, "estimator_", self.estimator)
return estimator.decision_function(X)

def __sklearn_tags__(self):
tags = super().__sklearn_tags__()
Expand Down Expand Up @@ -264,13 +274,6 @@ class FixedThresholdClassifier(BaseThresholdClassifier):
If the method is not implemented by the classifier, it will raise an
error.
prefit : bool, default=False
Whether a pre-fitted model is expected to be passed into the constructor
directly or not. If `True`, `estimator` must be a fitted estimator. If `False`,
`estimator` is fitted and updated by calling `fit`.
.. versionadded:: 1.6
Attributes
----------
estimator_ : estimator instance
Expand Down Expand Up @@ -322,7 +325,6 @@ class FixedThresholdClassifier(BaseThresholdClassifier):
**BaseThresholdClassifier._parameter_constraints,
"threshold": [StrOptions({"auto"}), Real],
"pos_label": [Real, str, "boolean", None],
"prefit": ["boolean"],
}

def __init__(
Expand All @@ -332,12 +334,22 @@ def __init__(
threshold="auto",
pos_label=None,
response_method="auto",
prefit=False,
):
super().__init__(estimator=estimator, response_method=response_method)
self.pos_label = pos_label
self.threshold = threshold
self.prefit = prefit

@property
def classes_(self):
if estimator := getattr(self, "estimator_", None):
return estimator.classes_
try:
check_is_fitted(self.estimator)
return self.estimator.classes_
except NotFittedError:
raise AttributeError(
"The underlying estimator is not fitted yet."
) from NotFittedError

def _fit(self, X, y, **params):
"""Fit the classifier.
Expand All @@ -360,13 +372,7 @@ def _fit(self, X, y, **params):
Returns an instance of self.
"""
routed_params = process_routing(self, "fit", **params)
if self.prefit:
check_is_fitted(self.estimator)
self.estimator_ = self.estimator
else:
self.estimator_ = clone(self.estimator).fit(
X, y, **routed_params.estimator.fit
)
self.estimator_ = clone(self.estimator).fit(X, y, **routed_params.estimator.fit)
return self

def predict(self, X):
Expand All @@ -382,9 +388,12 @@ def predict(self, X):
class_labels : ndarray of shape (n_samples,)
The predicted class.
"""
check_is_fitted(self, "estimator_")
_check_is_fitted(self)

estimator = getattr(self, "estimator_", self.estimator)

y_score, _, response_method_used = _get_response_values_binary(
self.estimator_,
estimator,
X,
self._get_response_method(),
pos_label=self.pos_label,
Expand Down
55 changes: 20 additions & 35 deletions sklearn/model_selection/tests/test_classification_threshold.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
import pytest

from sklearn import config_context
from sklearn.base import BaseEstimator, ClassifierMixin, clone
from sklearn.base import clone
from sklearn.datasets import (
load_breast_cancer,
load_iris,
Expand Down Expand Up @@ -593,41 +593,26 @@ def test_fixed_threshold_classifier_metadata_routing():
assert_allclose(classifier_default_threshold.estimator_.coef_, classifier.coef_)


class ClassifierLoggingFit(ClassifierMixin, BaseEstimator):
"""Classifier that logs the number of `fit` calls."""

def __init__(self, fit_calls=0):
self.fit_calls = fit_calls

def fit(self, X, y, **fit_params):
self.fit_calls += 1
self.is_fitted_ = True
return self

def predict_proba(self, X):
return np.ones((X.shape[0], 2), np.float64) # pragma: nocover


def test_fixed_threshold_classifier_prefit():
"""Check the behaviour of the `FixedThresholdClassifier` with the `prefit`
parameter."""
@pytest.mark.parametrize(
"method", ["predict_proba", "decision_function", "predict", "predict_log_proba"]
)
def test_fixed_threshold_classifier_fitted_estimator(method):
"""Check that if the underlying estimator is already fitted, no fit is required."""
X, y = make_classification(random_state=0)
classifier = LogisticRegression().fit(X, y)
fixed_threshold_classifier = FixedThresholdClassifier(estimator=classifier)
# This should not raise an error
getattr(fixed_threshold_classifier, method)(X)

estimator = ClassifierLoggingFit()
model = FixedThresholdClassifier(estimator=estimator, prefit=True)
with pytest.raises(NotFittedError):
model.fit(X, y)

# check that we don't clone the classifier when `prefit=True`.
estimator.fit(X, y)
model.fit(X, y)
assert estimator.fit_calls == 1
assert model.estimator_ is estimator
def test_fixed_threshold_classifier_classes_():
"""Check that the classes_ attribute is properly set."""
X, y = make_classification(random_state=0)
with pytest.raises(
AttributeError, match="The underlying estimator is not fitted yet."
):
FixedThresholdClassifier(estimator=LogisticRegression()).classes_

# check that we clone the classifier when `prefit=False`.
estimator = ClassifierLoggingFit()
model = FixedThresholdClassifier(estimator=estimator, prefit=False)
model.fit(X, y)
assert estimator.fit_calls == 0
assert model.estimator_.fit_calls == 1
assert model.estimator_ is not estimator
classifier = LogisticRegression().fit(X, y)
fixed_threshold_classifier = FixedThresholdClassifier(estimator=classifier)
assert_array_equal(fixed_threshold_classifier.classes_, classifier.classes_)

0 comments on commit 70aab36

Please sign in to comment.