scikit-learn · rth · Jun 29, 2021 · Jun 27, 2021 · Jun 28, 2021
diff --git a/benchmarks/bench_covertype.py b/benchmarks/bench_covertype.py
@@ -125,16 +125,18 @@ def load_data(dtype=np.float32, order="C", random_state=13):
         nargs="?",
         default=1,
         type=int,
-        help="Number of concurrently running workers for "
-        "models that support parallelism.",
+        help=(
+            "Number of concurrently running workers for "
+            "models that support parallelism."
+        ),
     )
     parser.add_argument(
         "--order",
         nargs="?",
         default="C",
         type=str,
         choices=["F", "C"],
-        help="Allow to choose between fortran and C ordered " "data",
+        help="Allow to choose between fortran and C ordered data",
     )
     parser.add_argument(
         "--random-seed",

diff --git a/benchmarks/bench_hist_gradient_boosting_adult.py b/benchmarks/bench_hist_gradient_boosting_adult.py
@@ -42,7 +42,7 @@ def predict(est, data_test, target_test):
     toc = time()
     roc_auc = roc_auc_score(target_test, predicted_proba_test[:, 1])
     acc = accuracy_score(target_test, predicted_test)
-    print(f"predicted in {toc - tic:.3f}s, " f"ROC AUC: {roc_auc:.4f}, ACC: {acc :.4f}")
+    print(f"predicted in {toc - tic:.3f}s, ROC AUC: {roc_auc:.4f}, ACC: {acc :.4f}")
 
 
 data = fetch_openml(data_id=179, as_frame=False)  # adult dataset

diff --git a/benchmarks/bench_hist_gradient_boosting_higgsboson.py b/benchmarks/bench_hist_gradient_boosting_higgsboson.py
@@ -27,7 +27,7 @@
 args = parser.parse_args()
 
 HERE = os.path.dirname(__file__)
-URL = "https://archive.ics.uci.edu/ml/machine-learning-databases/00280/" "HIGGS.csv.gz"
+URL = "https://archive.ics.uci.edu/ml/machine-learning-databases/00280/HIGGS.csv.gz"
 m = Memory(location=args.cache_loc, mmap_mode="r")
 
 n_leaf_nodes = args.n_leaf_nodes
@@ -71,7 +71,7 @@ def predict(est, data_test, target_test):
     toc = time()
     roc_auc = roc_auc_score(target_test, predicted_proba_test[:, 1])
     acc = accuracy_score(target_test, predicted_test)
-    print(f"predicted in {toc - tic:.3f}s, " f"ROC AUC: {roc_auc:.4f}, ACC: {acc :.4f}")
+    print(f"predicted in {toc - tic:.3f}s, ROC AUC: {roc_auc:.4f}, ACC: {acc :.4f}")
 
 
 df = load_data()

diff --git a/benchmarks/bench_isolation_forest.py b/benchmarks/bench_isolation_forest.py
@@ -143,7 +143,7 @@ def print_outlier_ratio(y):
     predict_time = time() - tstart
     fpr, tpr, thresholds = roc_curve(y_test, scoring)
     auc_score = auc(fpr, tpr)
-    label = "%s (AUC: %0.3f, train_time= %0.2fs, " "test_time= %0.2fs)" % (
+    label = "%s (AUC: %0.3f, train_time= %0.2fs, test_time= %0.2fs)" % (
         dat,
         auc_score,
         fit_time,

diff --git a/benchmarks/bench_isotonic.py b/benchmarks/bench_isotonic.py
@@ -61,7 +61,7 @@ def bench_isotonic_regression(Y):
         "--iterations",
         type=int,
         required=True,
-        help="Number of iterations to average timings over " "for each problem size",
+        help="Number of iterations to average timings over for each problem size",
     )
     parser.add_argument(
         "--log_min_problem_size",

diff --git a/benchmarks/bench_lof.py b/benchmarks/bench_lof.py
@@ -98,10 +98,8 @@
         fpr,
         tpr,
         lw=1,
-        label=(
-            "ROC for %s (area = %0.3f, train-time: %0.2fs)"
-            % (dataset_name, AUC, fit_time)
-        ),
+        label="ROC for %s (area = %0.3f, train-time: %0.2fs)"
+        % (dataset_name, AUC, fit_time),
     )
 
 plt.xlim([-0.05, 1.05])

diff --git a/benchmarks/bench_mnist.py b/benchmarks/bench_mnist.py
@@ -132,16 +132,18 @@ def load_data(dtype=np.float32, order="F"):
         nargs="?",
         default=1,
         type=int,
-        help="Number of concurrently running workers for "
-        "models that support parallelism.",
+        help=(
+            "Number of concurrently running workers for "
+            "models that support parallelism."
+        ),
     )
     parser.add_argument(
         "--order",
         nargs="?",
         default="C",
         type=str,
         choices=["F", "C"],
-        help="Allow to choose between fortran and C ordered " "data",
+        help="Allow to choose between fortran and C ordered data",
     )
     parser.add_argument(
         "--random-seed",
@@ -215,15 +217,17 @@ def load_data(dtype=np.float32, order="F"):
     print("Classification performance:")
     print("===========================")
     print(
-        "{0: <24} {1: >10} {2: >11} {3: >12}"
-        "".format("Classifier  ", "train-time", "test-time", "error-rate")
+        "{0: <24} {1: >10} {2: >11} {3: >12}".format(
+            "Classifier  ", "train-time", "test-time", "error-rate"
+        )
     )
     print("-" * 60)
     for name in sorted(args["classifiers"], key=error.get):
 
         print(
-            "{0: <23} {1: >10.2f}s {2: >10.2f}s {3: >12.4f}"
-            "".format(name, train_time[name], test_time[name], error[name])
+            "{0: <23} {1: >10.2f}s {2: >10.2f}s {3: >12.4f}".format(
+                name, train_time[name], test_time[name], error[name]
+            )
         )
 
     print()
diff --git a/benchmarks/bench_multilabel_metrics.py b/benchmarks/bench_multilabel_metrics.py
@@ -155,14 +155,15 @@ def _plot(
         "metrics",
         nargs="*",
         default=sorted(METRICS),
-        help="Specifies metrics to benchmark, defaults to all. "
-        "Choices are: {}".format(sorted(METRICS)),
+        help="Specifies metrics to benchmark, defaults to all. Choices are: {}".format(
+            sorted(METRICS)
+        ),
     )
     ap.add_argument(
         "--formats",
         nargs="+",
         choices=sorted(FORMATS),
-        help="Specifies multilabel formats to benchmark " "(defaults to all).",
+        help="Specifies multilabel formats to benchmark (defaults to all).",
     )
     ap.add_argument(
         "--samples", type=int, default=1000, help="The number of samples to generate"
@@ -178,8 +179,9 @@ def _plot(
         "--plot",
         choices=["classes", "density", "samples"],
         default=None,
-        help="Plot time with respect to this parameter varying "
-        "up to the specified value",
+        help=(
+            "Plot time with respect to this parameter varying up to the specified value"
+        ),
     )
     ap.add_argument(
         "--n-steps", default=10, type=int, help="Plot this many points for each metric"

diff --git a/benchmarks/bench_plot_incremental_pca.py b/benchmarks/bench_plot_incremental_pca.py
@@ -40,8 +40,7 @@ def plot_feature_times(all_times, batch_size, all_components, data):
     )
     plt.legend(loc="upper left")
     plt.suptitle(
-        "Algorithm runtime vs. n_components\n \
-                 LFW, size %i x %i"
+        "Algorithm runtime vs. n_components\n                  LFW, size %i x %i"
         % data.shape
     )
     plt.xlabel("Number of components (out of max %i)" % data.shape[1])
@@ -57,7 +56,7 @@ def plot_feature_errors(all_errors, batch_size, all_components, data):
         label="IncrementalPCA, bsize=%i" % batch_size,
     )
     plt.legend(loc="lower left")
-    plt.suptitle("Algorithm error vs. n_components\n" "LFW, size %i x %i" % data.shape)
+    plt.suptitle("Algorithm error vs. n_components\nLFW, size %i x %i" % data.shape)
     plt.xlabel("Number of components (out of max %i)" % data.shape[1])
     plt.ylabel("Mean absolute error")
 
@@ -68,9 +67,8 @@ def plot_batch_times(all_times, n_features, all_batch_sizes, data):
     plot_results(all_batch_sizes, all_times["ipca"], label="IncrementalPCA")
     plt.legend(loc="lower left")
     plt.suptitle(
-        "Algorithm runtime vs. batch_size for n_components %i\n \
-                 LFW, size %i x %i"
-        % (n_features, data.shape[0], data.shape[1])
+        "Algorithm runtime vs. batch_size for n_components %i\n                  LFW,"
+        " size %i x %i" % (n_features, data.shape[0], data.shape[1])
     )
     plt.xlabel("Batch size")
     plt.ylabel("Time (seconds)")
@@ -82,9 +80,8 @@ def plot_batch_errors(all_errors, n_features, all_batch_sizes, data):
     plot_results(all_batch_sizes, all_errors["ipca"], label="IncrementalPCA")
     plt.legend(loc="lower left")
     plt.suptitle(
-        "Algorithm error vs. batch_size for n_components %i\n \
-                 LFW, size %i x %i"
-        % (n_features, data.shape[0], data.shape[1])
+        "Algorithm error vs. batch_size for n_components %i\n                  LFW,"
+        " size %i x %i" % (n_features, data.shape[0], data.shape[1])
     )
     plt.xlabel("Batch size")
     plt.ylabel("Mean absolute error")

diff --git a/benchmarks/bench_plot_nmf.py b/benchmarks/bench_plot_nmf.py
@@ -254,18 +254,19 @@ def _fit_transform(self, X, y=None, W=None, H=None, update_H=True):
 
         if not isinstance(n_components, numbers.Integral) or n_components <= 0:
             raise ValueError(
-                "Number of components must be a positive integer;"
-                " got (n_components=%r)" % n_components
+                "Number of components must be a positive integer; got (n_components=%r)"
+                % n_components
             )
         if not isinstance(self.max_iter, numbers.Integral) or self.max_iter < 0:
             raise ValueError(
                 "Maximum number of iterations must be a positive "
-                "integer; got (max_iter=%r)" % self.max_iter
+                "integer; got (max_iter=%r)"
+                % self.max_iter
             )
         if not isinstance(self.tol, numbers.Number) or self.tol < 0:
             raise ValueError(
-                "Tolerance for stopping criteria must be "
-                "positive; got (tol=%r)" % self.tol
+                "Tolerance for stopping criteria must be positive; got (tol=%r)"
+                % self.tol
             )
 
         # check W and H, or initialize them
@@ -306,7 +307,8 @@ def _fit_transform(self, X, y=None, W=None, H=None, update_H=True):
         if n_iter == self.max_iter and self.tol > 0:
             warnings.warn(
                 "Maximum number of iteration %d reached. Increase it"
-                " to improve convergence." % self.max_iter,
+                " to improve convergence."
+                % self.max_iter,
                 ConvergenceWarning,
             )
 

diff --git a/benchmarks/bench_random_projections.py b/benchmarks/bench_random_projections.py
@@ -118,7 +118,7 @@ def print_row(clf_type, time_fit, time_transform):
         "--n-components",
         dest="n_components",
         default="auto",
-        help="Size of the random subspace." " ('auto' or int > 0)",
+        help="Size of the random subspace. ('auto' or int > 0)",
     )
 
     op.add_option(
@@ -149,8 +149,9 @@ def print_row(clf_type, time_fit, time_transform):
         "--density",
         dest="density",
         default=1 / 3,
-        help="Density used by the sparse random projection."
-        " ('auto' or float (0.0, 1.0]",
+        help=(
+            "Density used by the sparse random projection. ('auto' or float (0.0, 1.0]"
+        ),
     )
 
     op.add_option(
@@ -166,9 +167,11 @@ def print_row(clf_type, time_fit, time_transform):
         dest="selected_transformers",
         default="GaussianRandomProjection,SparseRandomProjection",
         type=str,
-        help="Comma-separated list of transformer to benchmark. "
-        "Default: %default. Available: "
-        "GaussianRandomProjection,SparseRandomProjection",
+        help=(
+            "Comma-separated list of transformer to benchmark. "
+            "Default: %default. Available: "
+            "GaussianRandomProjection,SparseRandomProjection"
+        ),
     )
 
     op.add_option(

diff --git a/benchmarks/bench_sample_without_replacement.py b/benchmarks/bench_sample_without_replacement.py
@@ -72,8 +72,10 @@ def bench_sample(sampling, n_population, n_samples):
         dest="selected_algorithm",
         default=default_algorithms,
         type=str,
-        help="Comma-separated list of transformer to benchmark. "
-        "Default: %default. \nAvailable: %default",
+        help=(
+            "Comma-separated list of transformer to benchmark. "
+            "Default: %default. \nAvailable: %default"
+        ),
     )
 
     # op.add_option("--random-seed",

diff --git a/benchmarks/bench_tsne_mnist.py b/benchmarks/bench_tsne_mnist.py
@@ -74,26 +74,30 @@ def sanitize(filename):
     parser.add_argument(
         "--bhtsne",
         action="store_true",
-        help="if set and the reference bhtsne code is "
-        "correctly installed, run it in the benchmark.",
+        help=(
+            "if set and the reference bhtsne code is "
+            "correctly installed, run it in the benchmark."
+        ),
     )
     parser.add_argument(
         "--all",
         action="store_true",
-        help="if set, run the benchmark with the whole MNIST."
-        "dataset. Note that it will take up to 1 hour.",
+        help=(
+            "if set, run the benchmark with the whole MNIST."
+            "dataset. Note that it will take up to 1 hour."
+        ),
     )
     parser.add_argument(
         "--profile",
         action="store_true",
-        help="if set, run the benchmark with a memory " "profiler.",
+        help="if set, run the benchmark with a memory profiler.",
     )
     parser.add_argument("--verbose", type=int, default=0)
     parser.add_argument(
         "--pca-components",
         type=int,
         default=50,
-        help="Number of principal components for " "preprocessing.",
+        help="Number of principal components for preprocessing.",
     )
     args = parser.parse_args()
 

diff --git a/build_tools/generate_authors_table.py b/build_tools/generate_authors_table.py
@@ -117,12 +117,12 @@ def key(profile):
 
 def generate_table(contributors):
     lines = [
-        (".. raw :: html\n"),
-        ("    <!-- Generated by generate_authors_table.py -->"),
-        ('    <div class="sk-authors-container">'),
-        ("    <style>"),
-        ("      img.avatar {border-radius: 10px;}"),
-        ("    </style>"),
+        ".. raw :: html\n",
+        "    <!-- Generated by generate_authors_table.py -->",
+        '    <div class="sk-authors-container">',
+        "    <style>",
+        "      img.avatar {border-radius: 10px;}",
+        "    </style>",
     ]
     for contributor in contributors:
         lines.append("    <div>")

diff --git a/doc/conf.py b/doc/conf.py
@@ -61,7 +61,7 @@
     mathjax_path = ""
 else:
     extensions.append("sphinx.ext.mathjax")
-    mathjax_path = "https://cdn.jsdelivr.net/npm/mathjax@3/es5/" "tex-chtml.js"
+    mathjax_path = "https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-chtml.js"
 
 autodoc_default_options = {"members": True, "inherited-members": True}
 
@@ -285,7 +285,7 @@
 v = parse(release)
 if v.release is None:
     raise ValueError(
-        "Ill-formed version: {!r}. Version should follow " "PEP440".format(version)
+        "Ill-formed version: {!r}. Version should follow PEP440".format(version)
     )
 
 if v.is_devrelease:
@@ -435,9 +435,7 @@ def generate_min_dependency_table(app):
 
     for package, (version, tags) in dependent_packages.items():
         output.write(
-            f"{package:<{package_header_len}} "
-            f"{version:<{version_header_len}} "
-            f"{tags}\n"
+            f"{package:<{package_header_len}} {version:<{version_header_len}} {tags}\n"
         )
 
     output.write(
@@ -494,8 +492,10 @@ def setup(app):
 warnings.filterwarnings(
     "ignore",
     category=UserWarning,
-    message="Matplotlib is currently using agg, which is a"
-    " non-GUI backend, so cannot show the figure.",
+    message=(
+        "Matplotlib is currently using agg, which is a"
+        " non-GUI backend, so cannot show the figure."
+    ),
 )
 
 

diff --git a/doc/conftest.py b/doc/conftest.py
@@ -92,9 +92,7 @@ def setup_unsupervised_learning():
     try:
         import skimage  # noqa
     except ImportError:
-        raise SkipTest(
-            "Skipping unsupervised_learning.rst, scikit-image " "not installed"
-        )
+        raise SkipTest("Skipping unsupervised_learning.rst, scikit-image not installed")
     # ignore deprecation warnings from scipy.misc.face
     warnings.filterwarnings(
         "ignore", "The binary mode of fromstring", DeprecationWarning