Skip to content

Commit dde6009

Browse files
committed
fix: fix chi_squared function
1 parent 5241b5b commit dde6009

1 file changed

Lines changed: 22 additions & 4 deletions

File tree

src/ydata_profiling/model/summary_algorithms.py

Lines changed: 22 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -102,18 +102,36 @@ def histogram_compute(
102102
stats[name] = hist
103103
return stats
104104

105-
106-
107105
def chi_square(
108106
values: Optional[np.ndarray] = None, histogram: Optional[np.ndarray] = None
109107
) -> dict:
110108
if histogram is None:
111-
bins = np.histogram_bin_edges(values, bins="auto")
109+
try:
110+
bins = np.histogram_bin_edges(values, bins="auto")
111+
except ValueError as exc:
112+
# NumPy 2.x strict binning error
113+
if "Too many bins for data range" in str(exc) or "Cannot create" in str(exc):
114+
# fallback: 1 bin covering the full range
115+
finite = values[np.isfinite(values)]
116+
if finite.size == 0:
117+
return {"statistic": 0, "pvalue": 0}
118+
vmin = float(np.min(finite))
119+
vmax = float(np.max(finite))
120+
if vmin == vmax:
121+
# degenerate range, expand a little
122+
eps = 0.5 if vmin == 0 else abs(vmin) * 0.1
123+
bins = np.array([vmin - eps, vmin + eps])
124+
else:
125+
bins = np.array([vmin, vmax])
126+
else:
127+
raise
128+
112129
histogram, _ = np.histogram(values, bins=bins)
130+
113131
if len(histogram) == 0 or np.sum(histogram) == 0:
114132
return {"statistic": 0, "pvalue": 0}
115-
return dict(chisquare(histogram)._asdict())
116133

134+
return dict(chisquare(histogram)._asdict())
117135

118136
def series_hashable(
119137
fn: Callable[[Settings, pd.Series, dict], Tuple[Settings, pd.Series, dict]]

0 commit comments

Comments
 (0)