@@ -25,6 +25,35 @@ def inner(
2525
2626 return inner
2727
28+ def safe_histogram (
29+ values : np .ndarray ,
30+ bins : Union [int , str , np .ndarray ] = "auto" ,
31+ weights : Optional [np .ndarray ] = None ,
32+ density : bool = False ,
33+ ) -> Tuple [np .ndarray , np .ndarray ]:
34+ """
35+ Wrapper to avoid
36+ ValueError: Too many bins for data range. Cannot create N finite-sized bins.
37+ """
38+ try :
39+ return np .histogram (values , bins = bins , weights = weights , density = density )
40+ except ValueError as exc :
41+ if "Too many bins for data range" in str (exc ):
42+ try :
43+ return np .histogram (values , bins = "auto" , weights = weights , density = density )
44+ except ValueError :
45+ finite = values [np .isfinite (values )]
46+ if finite .size == 0 :
47+ return np .array ([]), np .array ([])
48+ vmin = float (np .min (finite ))
49+ vmax = float (np .max (finite ))
50+ if vmin == vmax :
51+ eps = 0.5 if vmin == 0 else abs (vmin ) * 0.5
52+ bin_edges = np .array ([vmin - eps , vmin + eps ])
53+ else :
54+ bin_edges = np .array ([vmin , vmax ])
55+ return np .histogram (values , bins = bin_edges , weights = weights , density = density )
56+ raise
2857
2958def histogram_compute (
3059 config : Settings ,
@@ -38,33 +67,43 @@ def histogram_compute(
3867 return {name : []}
3968
4069 hist_config = config .plot .histogram
41- bins_arg = "auto" if hist_config .bins == 0 else min (hist_config .bins , n_unique )
4270
43- def _safe_histogram_bin_edges (values : np .ndarray , bins_param : Union [int , str ]) -> np .ndarray :
44- try :
45- return np .histogram_bin_edges (values , bins = bins_param )
46- except ValueError as exc :
47- if "Too many bins for data range" in str (exc ):
48- # fallback: auto selection
49- return np .histogram_bin_edges (values , bins = "auto" )
50- raise
71+ # Compute data range
72+ finite = finite_values [np .isfinite (finite_values )]
73+ vmin = float (np .min (finite ))
74+ vmax = float (np .max (finite ))
75+ data_range = vmax - vmin
76+
77+ # Choose of Bins based on observed data values
78+ if data_range == 0 :
79+ eps = 0.5 if vmin == 0 else abs (vmin ) * 0.1
80+ bins = np .array ([vmin - eps , vmin + eps ])
81+ else :
82+ requested_bins = hist_config .bins if hist_config .bins > 0 else "auto"
83+
84+ if isinstance (requested_bins , int ):
85+ safe_bins = min (requested_bins , n_unique , hist_config .max_bins )
5186
52- bins = _safe_histogram_bin_edges ( finite_values , bins_arg )
87+ safe_bins = max ( 1 , safe_bins )
5388
54- if len (bins ) > hist_config .max_bins :
55- bins = _safe_histogram_bin_edges (finite_values , hist_config .max_bins )
56- if weights is not None and len (weights ) != len (bins ):
57- weights = None
89+ bins = np .linspace (vmin , vmax , safe_bins + 1 )
90+ else :
91+ bins = np .histogram_bin_edges (finite_values , bins = "auto" )
92+ if len (bins ) - 1 > hist_config .max_bins :
93+ bins = np .linspace (vmin , vmax , hist_config .max_bins + 1 )
5894
59- stats [ name ] = np .histogram (
95+ hist = np .histogram (
6096 finite_values ,
6197 bins = bins ,
6298 weights = weights ,
63- density = config . plot . histogram .density ,
99+ density = hist_config .density ,
64100 )
101+
102+ stats [name ] = hist
65103 return stats
66104
67105
106+
68107def chi_square (
69108 values : Optional [np .ndarray ] = None , histogram : Optional [np .ndarray ] = None
70109) -> dict :
0 commit comments