xarray-contrib · brendancol · Jun 4, 2026 · Jun 4, 2026
diff --git a/.claude/sweep-performance-state.csv b/.claude/sweep-performance-state.csv
@@ -22,6 +22,7 @@ geotiff,2026-05-20,SAFE,IO-bound,0,2212,"Pass 13 (2026-05-20): 1 MEDIUM found an
 glcm,2026-03-31T18:00:00Z,SAFE,compute-bound,0,,"Downgraded to MEDIUM. da.stack without rechunk is scheduling overhead, not OOM risk."
 hillshade,2026-04-16T12:00:00Z,SAFE,compute-bound,0,,"Re-audit after Horn's method rewrite (PR 1175): clean stencil, map_overlap depth=(1,1), no materialization. Zero findings."
 hydro,2026-05-01,RISKY,memory-bound,0,1416,"Fixed-in-tree 2026-05-01: hand_mfd._hand_mfd_dask now assembles via da.map_blocks instead of eager da.block of pre-computed tiles (matches hand_dinf pattern). Remaining MEDIUM: sink_d8 CCL fully materializes labels (inherently global), flow_accumulation_mfd frac_bdry held in driver dict instead of memmap-backed BoundaryStore. D8 iterative paths (flow_accum/fill/watershed/basin/stream_*) use serial-tile sweep with memmap-backed boundary store -- per-tile RAM bounded but driver iterates O(diameter) times. flow_direction_*, flow_path/snap_pour_point/twi/hand_d8/hand_dinf are SAFE."
+interpolate-kriging,2026-06-04,SAFE,graph-bound,0,2923,"MEDIUM: memory guard used full-grid k0 term on dask templates -> spurious MemoryError (issue #2923, fixed). LOW: _experimental_variogram nlags python loop vectorizable via bincount (~1.2x, pair-array materialization dominates) - doc only. Dask graph clean (2 tasks/chunk); cupy returns device arrays; no .values/.compute/.data.get materialization."
 kde,2026-04-14T12:00:00Z,SAFE,compute-bound,0,,Graph construction serialized per-tile. _filter_points_to_tile scans all points per tile. No HIGH findings.
 mahalanobis,2026-03-31T18:00:00Z,SAFE,compute-bound,0,,False positive. Numpy path materializes by design. Dask path uses lazy reductions + map_blocks.
 morphology,2026-03-31T18:00:00Z,SAFE,compute-bound,0,,

diff --git a/xrspatial/interpolate/_kriging.py b/xrspatial/interpolate/_kriging.py
@@ -336,7 +336,7 @@ def _chunk_var(block, block_info=None):
 # Public API
 # ---------------------------------------------------------------------------
 
-def _check_kriging_memory(n_points, grid_pixels):
+def _check_kriging_memory(n_points, grid_pixels, is_dask=False):
     """Raise MemoryError if kriging() would exceed available memory.
 
     Three allocations dominate kriging memory use:
@@ -355,13 +355,20 @@ def _check_kriging_memory(n_points, grid_pixels):
     Worst case is the maximum of these three.  The variogram and matrix
     builds run sequentially, and ``k0`` is built later, so peak usage
     is bounded by the largest single allocation.
+
+    When ``is_dask`` is True the prediction ``k0`` matrix is built one
+    chunk at a time by ``map_blocks``, so its peak size scales with the
+    chunk rather than ``grid_pixels``.  ``grid_pixels`` is not a valid
+    bound for that path, so the ``k0`` term is dropped.  The variogram
+    and matrix terms are point-based and materialised on the host
+    regardless of backend, so they still apply.
     """
     n = int(n_points)
     g = int(grid_pixels)
 
     pair_bytes = 4 * (n * (n - 1) // 2) * 8 if n > 1 else 0
     matrix_bytes = 3 * (n + 1) * (n + 1) * 8
-    k0_bytes = 3 * g * (n + 1) * 8
+    k0_bytes = 0 if is_dask else 3 * g * (n + 1) * 8
 
     estimate = max(pair_bytes, matrix_bytes, k0_bytes)
 
@@ -446,7 +453,8 @@ def kriging(x, y, z, template, variogram_model='spherical', nlags=15,
     # Memory guard.  Runs after input validation so we know N and the
     # template grid size, but before any large allocation.
     grid_pixels = int(np.prod(template.shape))
-    _check_kriging_memory(len(x_arr), grid_pixels)
+    is_dask = da is not None and isinstance(template.data, da.Array)
+    _check_kriging_memory(len(x_arr), grid_pixels, is_dask=is_dask)
 
     # Experimental variogram
     lag_h, lag_sv = _experimental_variogram(x_arr, y_arr, z_arr, nlags)

diff --git a/xrspatial/tests/test_interpolation.py b/xrspatial/tests/test_interpolation.py
@@ -487,6 +487,59 @@ def test_check_helper_estimate_message(self, monkeypatch):
         with pytest.raises(MemoryError, match='prediction matrix'):
             _check_kriging_memory(n_points=10, grid_pixels=100_000)
 
+    def test_check_helper_dask_skips_k0_term(self):
+        """is_dask=True drops the grid-sized k0 term from the estimate."""
+        from xrspatial.interpolate._kriging import _check_kriging_memory
+
+        # Same n and grid_pixels as the message test above, which trips
+        # the guard at 1 MB available.  With is_dask=True the k0 term is
+        # dropped, so the tiny point-based terms stay well under 1 MB and
+        # nothing is raised.
+        _check_kriging_memory(n_points=10, grid_pixels=100_000,
+                              is_dask=True)
+
+    def test_check_helper_dask_still_guards_matrix(self, monkeypatch):
+        """is_dask=True keeps the point-based matrix guard."""
+        from xrspatial.interpolate._kriging import _check_kriging_memory
+
+        monkeypatch.setattr(
+            'xrspatial.zonal._available_memory_bytes',
+            lambda: 32 * 1024 ** 2,
+        )
+
+        # n=3000 -> matrix_bytes ~ 216 MB regardless of backend.
+        with pytest.raises(MemoryError, match='kriging matrix'):
+            _check_kriging_memory(n_points=3000, grid_pixels=4,
+                                  is_dask=True)
+
+    @dask_array_available
+    def test_dask_template_skips_grid_memory_guard(self, monkeypatch):
+        """A large chunked dask template is not rejected by the guard.
+
+        The dask backend builds the prediction matrix per chunk, so the
+        full-grid k0 estimate must not gate it (issue #2923).
+        """
+        monkeypatch.setattr(
+            'xrspatial.zonal._available_memory_bytes',
+            lambda: 64 * 1024 ** 2,
+        )
+
+        rng = np.random.RandomState(0)
+        x = rng.uniform(0, 100, 20)
+        y = rng.uniform(0, 100, 20)
+        z = 2.0 * x + 3.0 * y + rng.normal(0, 0.1, 20)
+
+        # Full-grid k0 would be ~8 GB, far over 64 MB, but each
+        # 200x200 chunk only needs ~20 MB.
+        template = _make_template(
+            np.linspace(0, 100, 4000),
+            np.linspace(0, 100, 4000),
+            backend='dask', chunks=(200, 200),
+        )
+
+        result = kriging(x, y, z, template)
+        assert result.shape == template.shape
+
 
 class TestIDWMemoryGuard:
     """Verify idw(k=...) refuses to allocate more than ~80% of RAM.