-
-
Notifications
You must be signed in to change notification settings - Fork 8.1k
Fix PDF bloat for off-axis scatter with per-point colors #30746
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from 2 commits
4104dd5
f95d761
1f3b3c3
1c3e605
3ef8b23
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -2104,12 +2104,26 @@ def draw_path_collection(self, gc, master_transform, paths, all_transforms, | |
|
|
||
| padding = np.max(linewidths) | ||
| path_codes = [] | ||
|
|
||
| # Calculate maximum marker extent for conservative bounds checking. | ||
| # We need to account for marker size, not just position. | ||
| max_marker_extent = 0 | ||
| for i, (path, transform) in enumerate(self._iter_collection_raw_paths( | ||
| master_transform, paths, all_transforms)): | ||
| if len(path.vertices): | ||
| # Get the bounding box of the transformed marker path. | ||
| # Use get_extents() which is more efficient than transforming | ||
| # all vertices, and add padding for stroke width. | ||
| bbox = path.get_extents(transform) | ||
| max_marker_extent = max(max_marker_extent, | ||
| bbox.width / 2, bbox.height / 2) | ||
| name = self.file.pathCollectionObject( | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Does this write something into the file our only update our Python side sate?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It does not write into file immediately but seems it writes after all drawing is done. Let me fix this by creating path templates only for used path_ids and compute extents for creted paths. |
||
| gc, path, transform, padding, filled, stroked) | ||
| path_codes.append(name) | ||
|
|
||
| # Add padding for stroke width. | ||
| max_marker_extent += padding | ||
|
|
||
| output = self.file.output | ||
| output(*self.gc.push()) | ||
| lastx, lasty = 0, 0 | ||
|
|
@@ -2118,6 +2132,15 @@ def draw_path_collection(self, gc, master_transform, paths, all_transforms, | |
| facecolors, edgecolors, linewidths, linestyles, | ||
| antialiaseds, urls, offset_position, hatchcolors=hatchcolors): | ||
|
|
||
| # Skip markers outside visible canvas bounds to reduce PDF size. | ||
| # Add max_marker_extent margin to account for marker size - a marker | ||
| # may be partially visible even if its center is outside the canvas. | ||
| canvas_width = self.file.width * 72 | ||
| canvas_height = self.file.height * 72 | ||
| if not (-max_marker_extent <= xo <= canvas_width + max_marker_extent | ||
|
||
| and -max_marker_extent <= yo <= canvas_height + max_marker_extent): | ||
| continue | ||
|
|
||
| self.check_gc(gc0, rgbFace) | ||
| dx, dy = xo - lastx, yo - lasty | ||
| output(1, 0, 0, 1, dx, dy, Op.concat_matrix, path_id, | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -478,3 +478,112 @@ def test_font_bitstream_charter(): | |
| ax.text(0.1, 0.3, r"fi ffl 1234", usetex=True, fontsize=50) | ||
| ax.set_xticks([]) | ||
| ax.set_yticks([]) | ||
|
|
||
|
|
||
| def test_scatter_offaxis_colored_pdf_size(): | ||
| """ | ||
| Test that off-axis scatter plots with per-point colors don't bloat PDFs. | ||
|
|
||
| Regression test for issue #2488. When scatter points with per-point colors | ||
| are completely outside the visible axes, the PDF backend should skip | ||
| writing those markers to significantly reduce file size. | ||
| """ | ||
| # Use John Hunter's birthday as random seed for reproducibility | ||
| rng = np.random.default_rng(19680801) | ||
|
|
||
| n_points = 1000 | ||
| x = rng.random(n_points) * 10 | ||
| y = rng.random(n_points) * 10 | ||
| c = rng.random(n_points) | ||
|
|
||
| # Test 1: Scatter with per-point colors, all points OFF-AXIS | ||
| fig1, ax1 = plt.subplots() | ||
| ax1.scatter(x, y, c=c) | ||
| ax1.set_xlim(20, 30) # Move view completely away from data (x is 0-10) | ||
| ax1.set_ylim(20, 30) # Move view completely away from data (y is 0-10) | ||
|
|
||
| buf1 = io.BytesIO() | ||
| fig1.savefig(buf1, format='pdf') | ||
| size_offaxis_colored = buf1.tell() | ||
| plt.close(fig1) | ||
|
|
||
| # Test 2: Empty scatter (baseline - smallest possible) | ||
| fig2, ax2 = plt.subplots() | ||
| ax2.set_xlim(20, 30) | ||
| ax2.set_ylim(20, 30) | ||
|
|
||
| buf2 = io.BytesIO() | ||
| fig2.savefig(buf2, format='pdf') | ||
| size_empty = buf2.tell() | ||
| plt.close(fig2) | ||
|
|
||
| # The off-axis colored scatter should be close to empty size | ||
| # Allow up to 50KB overhead for axes/metadata, but should be much smaller | ||
| # than if all 1000 markers were written (which would add ~200-400KB) | ||
| assert size_offaxis_colored < size_empty + 50_000, ( | ||
|
||
| f"Off-axis colored scatter PDF ({size_offaxis_colored} bytes) is too large. " | ||
| f"Expected close to empty figure size ({size_empty} bytes). " | ||
| f"Markers may not be properly skipped." | ||
| ) | ||
|
|
||
|
|
||
| @check_figures_equal(extensions=["pdf"]) | ||
| def test_scatter_offaxis_colored_visual(fig_test, fig_ref): | ||
| """ | ||
| Test that on-axis scatter with per-point colors still renders correctly. | ||
|
|
||
| Ensures the optimization for off-axis markers doesn't break normal | ||
| scatter rendering. | ||
| """ | ||
| rng = np.random.default_rng(19680801) | ||
|
|
||
| n_points = 100 | ||
| x = rng.random(n_points) * 5 | ||
| y = rng.random(n_points) * 5 | ||
| c = rng.random(n_points) | ||
|
|
||
| # Test figure: scatter with clipping optimization | ||
| ax_test = fig_test.subplots() | ||
| ax_test.scatter(x, y, c=c, s=50) | ||
| ax_test.set_xlim(0, 10) | ||
| ax_test.set_ylim(0, 10) | ||
|
|
||
| # Reference figure: should look identical | ||
| ax_ref = fig_ref.subplots() | ||
| ax_ref.scatter(x, y, c=c, s=50) | ||
| ax_ref.set_xlim(0, 10) | ||
| ax_ref.set_ylim(0, 10) | ||
|
|
||
|
|
||
| @check_figures_equal(extensions=["pdf"]) | ||
| def test_scatter_mixed_onoff_axis(fig_test, fig_ref): | ||
| """ | ||
| Test scatter with some points on-axis and some off-axis. | ||
|
|
||
| Ensures the optimization correctly handles the common case where only | ||
| some markers are outside the visible area. | ||
| """ | ||
| rng = np.random.default_rng(19680801) | ||
|
|
||
| # Create points: half on-axis (0-5), half off-axis (15-20) | ||
| n_points = 50 | ||
| x_on = rng.random(n_points) * 5 | ||
| y_on = rng.random(n_points) * 5 | ||
| x_off = rng.random(n_points) * 5 + 15 | ||
| y_off = rng.random(n_points) * 5 + 15 | ||
|
|
||
| x = np.concatenate([x_on, x_off]) | ||
| y = np.concatenate([y_on, y_off]) | ||
| c = rng.random(2 * n_points) | ||
|
|
||
| # Test figure: scatter with mixed points | ||
| ax_test = fig_test.subplots() | ||
| ax_test.scatter(x, y, c=c, s=50) | ||
| ax_test.set_xlim(0, 10) | ||
| ax_test.set_ylim(0, 10) | ||
|
|
||
| # Reference figure: only the on-axis points should be visible | ||
| ax_ref = fig_ref.subplots() | ||
| ax_ref.scatter(x_on, y_on, c=c[:n_points], s=50) | ||
| ax_ref.set_xlim(0, 10) | ||
| ax_ref.set_ylim(0, 10) | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Did you test that this works as expected with log scale and polar?