Skip to content

Commit

Permalink
NumPy 2.0 compatibility (#1632)
Browse files Browse the repository at this point in the history
* Replace `np.in1d` with `np.isin`

This comes by checking against the NPY201
rule for Ruff that checks deprecations for a
NumPy <2 codebase

* Clean up setup script slightly

* Bump Cython, NumPy build-time requirements

* Run linter

* Remove <2 pin on NumPy runtime dependency

* Update lockfile

* Reset and exercise poetry caches

* Increment virtual environment cache

* Bump scikit-learn, polars, scipy, pandas, matplotlib, sympy

* Update lockfile

* Bump build number to invalidate caches

* Ensure Python floats are output

* ruff

* fix remaining tests

* format

* Redefine

* Increment cache number

* Include poetry cache key in venv cache key

* Update lockfile for Polars

* Revert "Include poetry cache key in venv cache key"

This reverts commit 2548909.

* Use `cache@v4` and `checkout@v4`

* Apply suggestions from code review

---------

Co-authored-by: Max Halford <[email protected]>
Co-authored-by: Saulo Martiello Mastelini <[email protected]>
  • Loading branch information
3 people authored Nov 19, 2024
1 parent ada5ada commit e069b67
Show file tree
Hide file tree
Showing 31 changed files with 2,231 additions and 2,006 deletions.
2 changes: 1 addition & 1 deletion .github/actions/install-env/action.yml
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ runs:
uses: actions/cache@v4
with:
path: ~/.local # the path depends on the OS
key: poetry-2 # increment to reset cache
key: poetry-2 # modify to reset cache

- name: Install poetry
uses: snok/install-poetry@v1
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/code-quality.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ jobs:
ubuntu:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- uses: actions/checkout@v4

- name: Build River
uses: ./.github/actions/install-env
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/dev-docs.yml
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ jobs:
runs-on: ubuntu-latest

steps:
- uses: actions/checkout@v3
- uses: actions/checkout@v4

- name: Build River
uses: ./.github/actions/install-env
Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/pypi.yml
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ jobs:
]

steps:
- uses: actions/checkout@v3
- uses: actions/checkout@v4

- name: Set up rust
if: matrix.os != 'ubuntu-20.04' && matrix.os != 'ubuntu-22.04'
Expand Down Expand Up @@ -104,7 +104,7 @@ jobs:
name: Build source distribution
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- uses: actions/checkout@v4

- name: Build River
uses: ./.github/actions/install-env
Expand Down
6 changes: 3 additions & 3 deletions .github/workflows/unit-tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -18,21 +18,21 @@ jobs:
runs-on: ${{ matrix.os }}

steps:
- uses: actions/checkout@v3
- uses: actions/checkout@v4

- name: Build River
uses: ./.github/actions/install-env
with:
python-version: "3.12"

- name: Cache River datasets
uses: actions/cache@v3
uses: actions/cache@v4
with:
path: ~/river_data
key: ${{ runner.os }}

- name: Cache scikit-learn datasets
uses: actions/cache@v3
uses: actions/cache@v4
with:
path: ~/scikit_learn_data
key: ${{ runner.os }}
Expand Down
31 changes: 9 additions & 22 deletions build.py
Original file line number Diff line number Diff line change
@@ -1,36 +1,23 @@
import platform
from distutils.command.build_ext import build_ext
from distutils.errors import CCompilerError, DistutilsExecError, DistutilsPlatformError

import numpy
import setuptools
from Cython.Build import cythonize
from setuptools.command.build_ext import build_ext
from setuptools.errors import CCompilerError
from setuptools_rust import Binding, RustExtension

try:
from numpy import __version__ as numpy_version
from numpy import get_include
except ImportError:
subprocess.check_call([sys.executable, "-m", "pip", "install", "numpy"])
from numpy import __version__ as numpy_version
from numpy import get_include

try:
from Cython.Build import cythonize
except ImportError:
subprocess.check_call([sys.executable, "-m", "pip", "install", "Cython"])
from Cython.Build import cythonize # type: ignore


ext_modules = cythonize(
module_list=[
setuptools.Extension(
"*",
sources=["**/*.pyx"],
include_dirs=[get_include()],
sources=["river/**/*.pyx"],
include_dirs=[numpy.get_include()],
libraries=[] if platform.system() == "Windows" else ["m"],
define_macros=[("NPY_NO_DEPRECATED_API", "NPY_1_7_API_VERSION")],
)
],
compiler_directives={
"language_level": 3,
"binding": True,
"embedsignature": True,
},
Expand All @@ -47,13 +34,13 @@ class ExtBuilder(build_ext):
def run(self):
try:
build_ext.run(self)
except (DistutilsPlatformError, FileNotFoundError):
except (FileNotFoundError):
raise BuildFailed("File not found. Could not compile C extension.")

def build_extension(self, ext):
try:
build_ext.build_extension(self, ext)
except (CCompilerError, DistutilsExecError, DistutilsPlatformError, ValueError):
except (CCompilerError, ValueError):
raise BuildFailed("Could not compile C extension.")


Expand Down
4,059 changes: 2,142 additions & 1,917 deletions poetry.lock

Large diffs are not rendered by default.

32 changes: 19 additions & 13 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,5 +1,11 @@
[build-system]
requires = ["poetry-core>=1.0.0", "cython", "numpy", "setuptools", "wheel", "setuptools-rust"]
requires = [
"poetry-core>=1.0.0",
"cython>3",
"numpy>=2.0.0",
"setuptools>=70.1.0",
"setuptools-rust",
]
build-backend = "poetry.core.masonry.api"

[tool.poetry]
Expand All @@ -20,7 +26,7 @@ include = [
"river/datasets/*.zip",
"river/stream/*.zip",
"Cargo.toml",
"rust_src/**/*"
"rust_src/**/*",
]

[tool.poetry.build]
Expand All @@ -29,34 +35,34 @@ script = "build.py"

[tool.poetry.dependencies]
python = "^3.9"
numpy = "^1.23.0"
scipy = "^1.12.1"
pandas = "^2.1"
numpy = ">=1.23.0"
scipy = "^1.13.1"
pandas = "^2.2.3"

[tool.poetry.group.dev.dependencies]
graphviz = "^0.20.1"
gymnasium = "^0.29.0"
matplotlib = "^3.0.2"
matplotlib = "^3.8.4"
mypy = "^1.11.1"
pre-commit = "^3.5.0"
pytest = "^7.4.2"
ruff = "^0.4.10"
scikit-learn = "^1.3.1"
scikit-learn = "^1.5.1"
sqlalchemy = "^2.0.22"
sympy = "^1.10.1"
pytest-xdist = {extras = ["psutil"], version = "^3.3.1"}
sympy = "^1.12.1"
pytest-xdist = { extras = ["psutil"], version = "^3.3.1" }
ipykernel = "^6.26.0"
ipython = "^8.17.2"
rich = "^13.6.0"
jupyter = "^1.0.0"
mike = "^2.0.0"
polars = "^0.20.8"
polars = "^1.1.0"

[tool.poetry.group.compat]
optional = true

[tool.poetry.group.compat.dependencies]
scikit-learn = "^1.0.1"
scikit-learn = "^1.5.1"
sqlalchemy = "^2.0.0"

[tool.poetry.group.docs]
Expand Down Expand Up @@ -84,7 +90,7 @@ optional = true

[tool.poetry.group.benchmark.dependencies]
"dominate" = "2.8.0"
"scikit-learn" = "1.3.1"
"scikit-learn" = "1.5.1"
"tabulate" = "0.9.0"
"vowpalwabbit" = "9.9.0"
"watermark" = "2.4.3"
Expand Down Expand Up @@ -161,7 +167,7 @@ module = [
"requests.*",
"gymnasium.*",
"sympy.*",
"polars.*"
"polars.*",
]
ignore_missing_imports = true

Expand Down
4 changes: 2 additions & 2 deletions river/compose/test_product.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,10 +83,10 @@ def test_issue_1253():
>>> model = group1 + group1 * group2
>>> XT = model.transform_many(X)
>>> XT.memory_usage().sum() // 1000
>>> XT.memory_usage().sum().item() // 1000
85
>>> XT.sparse.to_dense().memory_usage().sum() // 1000
>>> XT.sparse.to_dense().memory_usage().sum().item() // 1000
4455
>>> X, y = datasets.make_regression(n_samples=6, n_features=2)
Expand Down
2 changes: 1 addition & 1 deletion river/datasets/synth/anomaly_sine.py
Original file line number Diff line number Diff line change
Expand Up @@ -139,4 +139,4 @@ def __iter__(self):
self._generate_data()

for xi, yi in itertools.zip_longest(self.X, self.y if hasattr(self.y, "__iter__") else []):
yield dict(zip(["sine", "cosine"], xi)), bool(yi)
yield dict(zip(["sine", "cosine"], xi.tolist())), bool(yi)
5 changes: 4 additions & 1 deletion river/datasets/synth/logical.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,10 @@ def __iter__(self):
X, Y = self._make_logical(n_tiles=self.n_tiles, shuffle=self.shuffle)

for xi, yi in itertools.zip_longest(X, Y if hasattr(Y, "__iter__") else []):
yield dict(zip(self.feature_names, xi)), dict(zip(self.target_names, yi))
yield (
dict(zip(self.feature_names, xi.tolist())),
dict(zip(self.target_names, yi.tolist())),
)

def _make_logical(self, n_tiles: int = 1, shuffle: bool = True):
"""Make toy dataset"""
Expand Down
2 changes: 1 addition & 1 deletion river/facto/ffm.py
Original file line number Diff line number Diff line change
Expand Up @@ -255,7 +255,7 @@ def __init__(

def predict_one(self, x):
x = self._ohe_cat_features(x)
return self._raw_dot(x)
return self._raw_dot(x).item()


class FFMClassifier(FFM, base.Classifier):
Expand Down
2 changes: 1 addition & 1 deletion river/facto/fm.py
Original file line number Diff line number Diff line change
Expand Up @@ -238,7 +238,7 @@ def __init__(

def predict_one(self, x):
x = self._ohe_cat_features(x)
return self._raw_dot(x)
return self._raw_dot(x).item()


class FMClassifier(FM, base.Classifier):
Expand Down
2 changes: 1 addition & 1 deletion river/facto/fwfm.py
Original file line number Diff line number Diff line change
Expand Up @@ -275,7 +275,7 @@ def __init__(

def predict_one(self, x):
x = self._ohe_cat_features(x)
return self._raw_dot(x)
return self._raw_dot(x).item()


class FwFMClassifier(FwFM, base.Classifier):
Expand Down
2 changes: 1 addition & 1 deletion river/facto/hofm.py
Original file line number Diff line number Diff line change
Expand Up @@ -267,7 +267,7 @@ def __init__(

def predict_one(self, x):
x = self._ohe_cat_features(x)
return self._raw_dot(x)
return self._raw_dot(x).item()


class HOFMClassifier(HOFM, base.Classifier):
Expand Down
10 changes: 6 additions & 4 deletions river/forest/adaptive_random_forest.py
Original file line number Diff line number Diff line change
Expand Up @@ -160,10 +160,12 @@ def learn_one(self, x: dict, y: base.typing.Target, **kwargs):
# Update performance evaluator
self._metrics[i].update(
y_true=y,
y_pred=model.predict_proba_one(x)
if isinstance(self.metric, metrics.base.ClassificationMetric)
and not self.metric.requires_labels
else y_pred,
y_pred=(
model.predict_proba_one(x)
if isinstance(self.metric, metrics.base.ClassificationMetric)
and not self.metric.requires_labels
else y_pred
),
)

k = poisson(rate=self.lambda_value, rng=self._rng)
Expand Down
2 changes: 1 addition & 1 deletion river/linear_model/bayesian_lin_reg.py
Original file line number Diff line number Diff line change
Expand Up @@ -211,7 +211,7 @@ def predict_one(self, x, with_dist=False):
"""

# Bishop equation 3.58
y_pred_mean = utils.math.dot(self._m, x)
y_pred_mean = 0.0 if not len(self._m) else utils.math.dot(self._m, x).item()
if not with_dist:
return y_pred_mean

Expand Down
2 changes: 1 addition & 1 deletion river/naive_bayes/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ def one_hot_encode(y: pd.Series) -> pd.DataFrame:
"""
classes = np.unique(y)
indices = np.searchsorted(classes, y)
indptr = np.hstack((0, np.cumsum(np.in1d(y, classes))))
indptr = np.hstack((0, np.cumsum(np.isin(y, classes))))
data = np.empty_like(indices)
data.fill(1)
return pd.DataFrame.sparse.from_spmatrix(
Expand Down
2 changes: 1 addition & 1 deletion river/optim/initializers.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ class Normal(Initializer):
>>> init = optim.initializers.Normal(mu=0, sigma=1, seed=42)
>>> init(shape=1)
0.496714
np.float64(0.4967141...)
>>> init(shape=2)
array([-0.1382643 , 0.64768854])
Expand Down
4 changes: 2 additions & 2 deletions river/optim/newton.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,10 +45,10 @@ def sherman_morrison(A_inv: dict, u: dict, v: dict) -> dict:

den = 1 + utils.math.dot(utils.math.dotvecmat(u, A_inv), v)

for k, v in utils.math.matmul2d(
for k, val in utils.math.matmul2d(
utils.math.matmul2d(A_inv, utils.math.outer(u, v)), A_inv
).items():
A_inv[k] = A_inv.get(k, 0) - v / den
A_inv[k] = A_inv.get(k, 0) - val / den

return A_inv

Expand Down
2 changes: 1 addition & 1 deletion river/preprocessing/lda.py
Original file line number Diff line number Diff line change
Expand Up @@ -209,7 +209,7 @@ def transform_one(self, x):
# Sample empirical topic assignment:
_, components = self._compute_statistics_components(words_indexes_list)

return dict(enumerate(components))
return dict(enumerate(components.tolist()))

def _update_indexes(self, word_list: typing.Iterable[str]):
"""
Expand Down
8 changes: 5 additions & 3 deletions river/preprocessing/scale.py
Original file line number Diff line number Diff line change
Expand Up @@ -212,10 +212,12 @@ def learn_many(self, X: pd.DataFrame):
a = old_count / (old_count + new_count)
b = new_count / (old_count + new_count)

self.means[col] = a * old_mean + b * new_mean
self.means[col] = (a * old_mean + b * new_mean).item()
if self.with_std:
self.vars[col] = a * old_var + b * new_var + a * b * (old_mean - new_mean) ** 2
self.counts[col] += new_count
self.vars[col] = (
a * old_var + b * new_var + a * b * (old_mean - new_mean) ** 2
).item()
self.counts[col] += new_count.item()

def transform_many(self, X: pd.DataFrame):
"""Scale a mini-batch of features.
Expand Down
10 changes: 5 additions & 5 deletions river/proba/beta.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,20 +92,20 @@ def revert(self, x):
else:
self.beta -= 1

def __call__(self, p: float):
def __call__(self, p: float) -> float:
return (
p ** (self.alpha - 1) * (1 - p) ** (self.beta - 1) / _beta_func(self.alpha, self.beta)
)

def sample(self):
def sample(self) -> float:
return self._rng.betavariate(self.alpha, self.beta)

@property
def mode(self):
def mode(self) -> float:
try:
return (self.alpha - 1) / (self.alpha + self.beta - 2)
except ZeroDivisionError:
return 0.5

def cdf(self, x):
return scipy.special.betainc(self.alpha, self.beta, x)
def cdf(self, x) -> float:
return scipy.special.betainc(self.alpha, self.beta, x).item()
Loading

0 comments on commit e069b67

Please sign in to comment.