Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .claude/sweep-performance-state.csv
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ geotiff,2026-05-20,SAFE,IO-bound,0,2212,"Pass 13 (2026-05-20): 1 MEDIUM found an
glcm,2026-03-31T18:00:00Z,SAFE,compute-bound,0,,"Downgraded to MEDIUM. da.stack without rechunk is scheduling overhead, not OOM risk."
hillshade,2026-04-16T12:00:00Z,SAFE,compute-bound,0,,"Re-audit after Horn's method rewrite (PR 1175): clean stencil, map_overlap depth=(1,1), no materialization. Zero findings."
hydro,2026-05-01,RISKY,memory-bound,0,1416,"Fixed-in-tree 2026-05-01: hand_mfd._hand_mfd_dask now assembles via da.map_blocks instead of eager da.block of pre-computed tiles (matches hand_dinf pattern). Remaining MEDIUM: sink_d8 CCL fully materializes labels (inherently global), flow_accumulation_mfd frac_bdry held in driver dict instead of memmap-backed BoundaryStore. D8 iterative paths (flow_accum/fill/watershed/basin/stream_*) use serial-tile sweep with memmap-backed boundary store -- per-tile RAM bounded but driver iterates O(diameter) times. flow_direction_*, flow_path/snap_pour_point/twi/hand_d8/hand_dinf are SAFE."
interpolate-kriging,2026-06-04,SAFE,graph-bound,0,2923,"MEDIUM: memory guard used full-grid k0 term on dask templates -> spurious MemoryError (issue #2923, fixed). LOW: _experimental_variogram nlags python loop vectorizable via bincount (~1.2x, pair-array materialization dominates) - doc only. Dask graph clean (2 tasks/chunk); cupy returns device arrays; no .values/.compute/.data.get materialization."
kde,2026-04-14T12:00:00Z,SAFE,compute-bound,0,,Graph construction serialized per-tile. _filter_points_to_tile scans all points per tile. No HIGH findings.
mahalanobis,2026-03-31T18:00:00Z,SAFE,compute-bound,0,,False positive. Numpy path materializes by design. Dask path uses lazy reductions + map_blocks.
morphology,2026-03-31T18:00:00Z,SAFE,compute-bound,0,,
Expand Down
14 changes: 11 additions & 3 deletions xrspatial/interpolate/_kriging.py
Original file line number Diff line number Diff line change
Expand Up @@ -336,7 +336,7 @@ def _chunk_var(block, block_info=None):
# Public API
# ---------------------------------------------------------------------------

def _check_kriging_memory(n_points, grid_pixels):
def _check_kriging_memory(n_points, grid_pixels, is_dask=False):
"""Raise MemoryError if kriging() would exceed available memory.

Three allocations dominate kriging memory use:
Expand All @@ -355,13 +355,20 @@ def _check_kriging_memory(n_points, grid_pixels):
Worst case is the maximum of these three. The variogram and matrix
builds run sequentially, and ``k0`` is built later, so peak usage
is bounded by the largest single allocation.

When ``is_dask`` is True the prediction ``k0`` matrix is built one
chunk at a time by ``map_blocks``, so its peak size scales with the
chunk rather than ``grid_pixels``. ``grid_pixels`` is not a valid
bound for that path, so the ``k0`` term is dropped. The variogram
and matrix terms are point-based and materialised on the host
regardless of backend, so they still apply.
"""
n = int(n_points)
g = int(grid_pixels)

pair_bytes = 4 * (n * (n - 1) // 2) * 8 if n > 1 else 0
matrix_bytes = 3 * (n + 1) * (n + 1) * 8
k0_bytes = 3 * g * (n + 1) * 8
k0_bytes = 0 if is_dask else 3 * g * (n + 1) * 8

estimate = max(pair_bytes, matrix_bytes, k0_bytes)

Expand Down Expand Up @@ -446,7 +453,8 @@ def kriging(x, y, z, template, variogram_model='spherical', nlags=15,
# Memory guard. Runs after input validation so we know N and the
# template grid size, but before any large allocation.
grid_pixels = int(np.prod(template.shape))
_check_kriging_memory(len(x_arr), grid_pixels)
is_dask = da is not None and isinstance(template.data, da.Array)
_check_kriging_memory(len(x_arr), grid_pixels, is_dask=is_dask)

# Experimental variogram
lag_h, lag_sv = _experimental_variogram(x_arr, y_arr, z_arr, nlags)
Expand Down
53 changes: 53 additions & 0 deletions xrspatial/tests/test_interpolation.py
Original file line number Diff line number Diff line change
Expand Up @@ -487,6 +487,59 @@ def test_check_helper_estimate_message(self, monkeypatch):
with pytest.raises(MemoryError, match='prediction matrix'):
_check_kriging_memory(n_points=10, grid_pixels=100_000)

def test_check_helper_dask_skips_k0_term(self):
"""is_dask=True drops the grid-sized k0 term from the estimate."""
from xrspatial.interpolate._kriging import _check_kriging_memory

# Same n and grid_pixels as the message test above, which trips
# the guard at 1 MB available. With is_dask=True the k0 term is
# dropped, so the tiny point-based terms stay well under 1 MB and
# nothing is raised.
_check_kriging_memory(n_points=10, grid_pixels=100_000,
is_dask=True)

def test_check_helper_dask_still_guards_matrix(self, monkeypatch):
"""is_dask=True keeps the point-based matrix guard."""
from xrspatial.interpolate._kriging import _check_kriging_memory

monkeypatch.setattr(
'xrspatial.zonal._available_memory_bytes',
lambda: 32 * 1024 ** 2,
)

# n=3000 -> matrix_bytes ~ 216 MB regardless of backend.
with pytest.raises(MemoryError, match='kriging matrix'):
_check_kriging_memory(n_points=3000, grid_pixels=4,
is_dask=True)

@dask_array_available
def test_dask_template_skips_grid_memory_guard(self, monkeypatch):
"""A large chunked dask template is not rejected by the guard.

The dask backend builds the prediction matrix per chunk, so the
full-grid k0 estimate must not gate it (issue #2923).
"""
monkeypatch.setattr(
'xrspatial.zonal._available_memory_bytes',
lambda: 64 * 1024 ** 2,
)

rng = np.random.RandomState(0)
x = rng.uniform(0, 100, 20)
y = rng.uniform(0, 100, 20)
z = 2.0 * x + 3.0 * y + rng.normal(0, 0.1, 20)

# Full-grid k0 would be ~8 GB, far over 64 MB, but each
# 200x200 chunk only needs ~20 MB.
template = _make_template(
np.linspace(0, 100, 4000),
np.linspace(0, 100, 4000),
backend='dask', chunks=(200, 200),
)

result = kriging(x, y, z, template)
assert result.shape == template.shape


class TestIDWMemoryGuard:
"""Verify idw(k=...) refuses to allocate more than ~80% of RAM.
Expand Down
Loading