Skip to content

Commit

Permalink
FIX fixes memory leak seen in PyPy in Cython losses (#27670)
Browse files Browse the repository at this point in the history
Co-authored-by: Tim Head <[email protected]>
  • Loading branch information
glemaitre and betatim authored Oct 31, 2023
1 parent 25b71d2 commit a5fed0d
Show file tree
Hide file tree
Showing 4 changed files with 48 additions and 77 deletions.
3 changes: 3 additions & 0 deletions doc/whats_new/v1.4.rst
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,9 @@ random sampling procedures.
solvers (when fit on the same data again). The amount of change depends on the
specified `tol`, for small values you will get more precise results.

- |Fix| fixes a memory leak seen in PyPy for estimators using the Cython loss functions.
:pr:`27670` by :user:`Guillaume Lemaitre <glemaitre>`.

Changes impacting all modules
-----------------------------

Expand Down
57 changes: 11 additions & 46 deletions sklearn/_loss/_loss.pyx.tp
Original file line number Diff line number Diff line change
Expand Up @@ -870,7 +870,9 @@ cdef class CyLossFunction:
floating_out[::1] loss_out, # OUT
int n_threads=1
):
"""Compute the pointwise loss value for each input.
"""Compute the point-wise loss value for each input.

The point-wise loss is written to `loss_out` and no array is returned.

Parameters
----------
Expand All @@ -884,11 +886,6 @@ cdef class CyLossFunction:
A location into which the result is stored.
n_threads : int
Number of threads used by OpenMP (if any).

Returns
-------
loss : array of shape (n_samples,)
Element-wise loss function.
"""
pass

Expand All @@ -902,6 +899,8 @@ cdef class CyLossFunction:
):
"""Compute gradient of loss w.r.t raw_prediction for each input.

The gradient is written to `gradient_out` and no array is returned.

Parameters
----------
y_true : array of shape (n_samples,)
Expand All @@ -914,11 +913,6 @@ cdef class CyLossFunction:
A location into which the result is stored.
n_threads : int
Number of threads used by OpenMP (if any).

Returns
-------
gradient : array of shape (n_samples,)
Element-wise gradients.
"""
pass

Expand All @@ -933,6 +927,9 @@ cdef class CyLossFunction:
):
"""Compute loss and gradient of loss w.r.t raw_prediction.

The loss and gradient are written to `loss_out` and `gradient_out` and no arrays
are returned.

Parameters
----------
y_true : array of shape (n_samples,)
Expand All @@ -947,18 +944,9 @@ cdef class CyLossFunction:
A location into which the gradient is stored.
n_threads : int
Number of threads used by OpenMP (if any).

Returns
-------
loss : array of shape (n_samples,)
Element-wise loss function.

gradient : array of shape (n_samples,)
Element-wise gradients.
"""
self.loss(y_true, raw_prediction, sample_weight, loss_out, n_threads)
self.gradient(y_true, raw_prediction, sample_weight, gradient_out, n_threads)
return np.asarray(loss_out), np.asarray(gradient_out)

def gradient_hessian(
self,
Expand All @@ -971,6 +959,9 @@ cdef class CyLossFunction:
):
"""Compute gradient and hessian of loss w.r.t raw_prediction.

The gradient and hessian are written to `gradient_out` and `hessian_out` and no
arrays are returned.

Parameters
----------
y_true : array of shape (n_samples,)
Expand All @@ -985,14 +976,6 @@ cdef class CyLossFunction:
A location into which the hessian is stored.
n_threads : int
Number of threads used by OpenMP (if any).

Returns
-------
gradient : array of shape (n_samples,)
Element-wise gradients.

hessian : array of shape (n_samples,)
Element-wise hessians.
"""
pass

Expand Down Expand Up @@ -1045,8 +1028,6 @@ cdef class {{name}}(CyLossFunction):
):
loss_out[i] = sample_weight[i] * {{closs}}(y_true[i], raw_prediction[i]{{with_param}})

return np.asarray(loss_out)

{{if closs_grad is not None}}
def loss_gradient(
self,
Expand Down Expand Up @@ -1077,7 +1058,6 @@ cdef class {{name}}(CyLossFunction):
loss_out[i] = sample_weight[i] * dbl2.val1
gradient_out[i] = sample_weight[i] * dbl2.val2

return np.asarray(loss_out), np.asarray(gradient_out)
{{endif}}

def gradient(
Expand All @@ -1103,8 +1083,6 @@ cdef class {{name}}(CyLossFunction):
):
gradient_out[i] = sample_weight[i] * {{cgrad}}(y_true[i], raw_prediction[i]{{with_param}})

return np.asarray(gradient_out)

def gradient_hessian(
self,
const floating_in[::1] y_true, # IN
Expand Down Expand Up @@ -1134,8 +1112,6 @@ cdef class {{name}}(CyLossFunction):
gradient_out[i] = sample_weight[i] * dbl2.val1
hessian_out[i] = sample_weight[i] * dbl2.val2

return np.asarray(gradient_out), np.asarray(hessian_out)

{{endfor}}


Expand Down Expand Up @@ -1216,8 +1192,6 @@ cdef class CyHalfMultinomialLoss(CyLossFunction):

free(p)

return np.asarray(loss_out)

def loss_gradient(
self,
const floating_in[::1] y_true, # IN
Expand Down Expand Up @@ -1278,8 +1252,6 @@ cdef class CyHalfMultinomialLoss(CyLossFunction):

free(p)

return np.asarray(loss_out), np.asarray(gradient_out)

def gradient(
self,
const floating_in[::1] y_true, # IN
Expand Down Expand Up @@ -1327,8 +1299,6 @@ cdef class CyHalfMultinomialLoss(CyLossFunction):

free(p)

return np.asarray(gradient_out)

def gradient_hessian(
self,
const floating_in[::1] y_true, # IN
Expand Down Expand Up @@ -1381,9 +1351,6 @@ cdef class CyHalfMultinomialLoss(CyLossFunction):

free(p)

return np.asarray(gradient_out), np.asarray(hessian_out)


# This method simplifies the implementation of hessp in linear models,
# i.e. the matrix-vector product of the full hessian, not only of the
# diagonal (in the classes) approximation as implemented above.
Expand Down Expand Up @@ -1434,5 +1401,3 @@ cdef class CyHalfMultinomialLoss(CyLossFunction):
gradient_out[i, k] = (proba_out[i, k] - (y_true[i] == k)) * sample_weight[i]

free(p)

return np.asarray(gradient_out), np.asarray(proba_out)
15 changes: 10 additions & 5 deletions sklearn/_loss/loss.py
Original file line number Diff line number Diff line change
Expand Up @@ -189,13 +189,14 @@ def loss(
if raw_prediction.ndim == 2 and raw_prediction.shape[1] == 1:
raw_prediction = raw_prediction.squeeze(1)

return self.closs.loss(
self.closs.loss(
y_true=y_true,
raw_prediction=raw_prediction,
sample_weight=sample_weight,
loss_out=loss_out,
n_threads=n_threads,
)
return loss_out

def loss_gradient(
self,
Expand Down Expand Up @@ -250,14 +251,15 @@ def loss_gradient(
if gradient_out.ndim == 2 and gradient_out.shape[1] == 1:
gradient_out = gradient_out.squeeze(1)

return self.closs.loss_gradient(
self.closs.loss_gradient(
y_true=y_true,
raw_prediction=raw_prediction,
sample_weight=sample_weight,
loss_out=loss_out,
gradient_out=gradient_out,
n_threads=n_threads,
)
return loss_out, gradient_out

def gradient(
self,
Expand Down Expand Up @@ -299,13 +301,14 @@ def gradient(
if gradient_out.ndim == 2 and gradient_out.shape[1] == 1:
gradient_out = gradient_out.squeeze(1)

return self.closs.gradient(
self.closs.gradient(
y_true=y_true,
raw_prediction=raw_prediction,
sample_weight=sample_weight,
gradient_out=gradient_out,
n_threads=n_threads,
)
return gradient_out

def gradient_hessian(
self,
Expand Down Expand Up @@ -363,14 +366,15 @@ def gradient_hessian(
if hessian_out.ndim == 2 and hessian_out.shape[1] == 1:
hessian_out = hessian_out.squeeze(1)

return self.closs.gradient_hessian(
self.closs.gradient_hessian(
y_true=y_true,
raw_prediction=raw_prediction,
sample_weight=sample_weight,
gradient_out=gradient_out,
hessian_out=hessian_out,
n_threads=n_threads,
)
return gradient_out, hessian_out

def __call__(self, y_true, raw_prediction, sample_weight=None, n_threads=1):
"""Compute the weighted average loss.
Expand Down Expand Up @@ -1075,14 +1079,15 @@ def gradient_proba(
elif proba_out is None:
proba_out = np.empty_like(gradient_out)

return self.closs.gradient_proba(
self.closs.gradient_proba(
y_true=y_true,
raw_prediction=raw_prediction,
sample_weight=sample_weight,
gradient_out=gradient_out,
proba_out=proba_out,
n_threads=n_threads,
)
return gradient_out, proba_out


class ExponentialLoss(BaseLoss):
Expand Down
50 changes: 24 additions & 26 deletions sklearn/_loss/tests/test_loss.py
Original file line number Diff line number Diff line change
Expand Up @@ -383,34 +383,32 @@ def test_loss_same_as_C_functions(loss, sample_weight):
out_g2 = np.empty_like(raw_prediction)
out_h1 = np.empty_like(raw_prediction)
out_h2 = np.empty_like(raw_prediction)
assert_allclose(
loss.loss(
y_true=y_true,
raw_prediction=raw_prediction,
sample_weight=sample_weight,
loss_out=out_l1,
),
loss.closs.loss(
y_true=y_true,
raw_prediction=raw_prediction,
sample_weight=sample_weight,
loss_out=out_l2,
),
loss.loss(
y_true=y_true,
raw_prediction=raw_prediction,
sample_weight=sample_weight,
loss_out=out_l1,
)
assert_allclose(
loss.gradient(
y_true=y_true,
raw_prediction=raw_prediction,
sample_weight=sample_weight,
gradient_out=out_g1,
),
loss.closs.gradient(
y_true=y_true,
raw_prediction=raw_prediction,
sample_weight=sample_weight,
gradient_out=out_g2,
),
loss.closs.loss(
y_true=y_true,
raw_prediction=raw_prediction,
sample_weight=sample_weight,
loss_out=out_l2,
),
assert_allclose(out_l1, out_l2)
loss.gradient(
y_true=y_true,
raw_prediction=raw_prediction,
sample_weight=sample_weight,
gradient_out=out_g1,
)
loss.closs.gradient(
y_true=y_true,
raw_prediction=raw_prediction,
sample_weight=sample_weight,
gradient_out=out_g2,
)
assert_allclose(out_g1, out_g2)
loss.closs.loss_gradient(
y_true=y_true,
raw_prediction=raw_prediction,
Expand Down

0 comments on commit a5fed0d

Please sign in to comment.