Data-Centric-AI-Community · kchpp940 · Apr 12, 2026 · Apr 12, 2026 · Apr 12, 2026 · Apr 12, 2026
diff --git a/Dockerfile b/Dockerfile
@@ -0,0 +1,21 @@
+FROM python:3.10-slim
+
+WORKDIR /app
+
+RUN apt-get update && apt-get install -y --no-install-recommends \
+    build-essential \
+    && rm -rf /var/lib/apt/lists/*
+
+COPY . .
+
+RUN pip install --no-cache-dir --upgrade pip && \
+    pip install --no-cache-dir "setuptools>=72.0.0,<80.0.0" wheel && \
+    pip install --no-cache-dir . && \
+    pip install --no-cache-dir "setuptools>=72.0.0,<80.0.0" && \
+    pip install --no-cache-dir jupyter
+
+EXPOSE 8888
+
+CMD ["jupyter", "notebook", "--ip=0.0.0.0", "--port=8888", "--no-browser", "--allow-root"]
+
+
diff --git a/src/ydata_profiling/model/alerts.py b/src/ydata_profiling/model/alerts.py
@@ -12,8 +12,8 @@
 from ydata_profiling.utils.styles import get_alert_styles
 
 
-def fmt_percent(value: float, edge_cases: bool = True) -> str:
-    """Format a ratio as a percentage.
+def _fmt_percent(value: float, edge_cases: bool = True) -> str:
+    """Format a ratio as a percentage (internal copy to avoid circular imports).
 
     Args:
         edge_cases: Check for edge cases?
@@ -209,7 +209,7 @@ def __init__(
 
     def _get_description(self) -> str:
         if self.values is not None:
-            return f"Dataset has {self.values['n_duplicates']} ({fmt_percent(self.values['p_duplicates'])}) duplicate rows"
+            return f"Dataset has {self.values['n_duplicates']} ({_fmt_percent(self.values['p_duplicates'])}) duplicate rows"
         else:
             return "Dataset has no duplicated rows"
 
@@ -231,7 +231,7 @@ def __init__(
 
     def _get_description(self) -> str:
         if self.values is not None:
-            return f"Dataset has {self.values['n_near_dups']} ({fmt_percent(self.values['p_near_dups'])}) near duplicate rows"
+            return f"Dataset has {self.values['n_near_dups']} ({_fmt_percent(self.values['p_near_dups'])}) near duplicate rows"
         else:
             return "Dataset has no near duplicated rows"
 
@@ -272,7 +272,7 @@ def __init__(
 
     def _get_description(self) -> str:
         if self.values is not None:
-            return f"[{self.column_name}] has {self.values['n_distinct']:} ({fmt_percent(self.values['p_distinct'])}) distinct values"
+            return f"[{self.column_name}] has {self.values['n_distinct']:} ({_fmt_percent(self.values['p_distinct'])}) distinct values"
         else:
             return f"[{self.column_name}] has a high cardinality"
 
@@ -294,7 +294,7 @@ def __init__(
 
     def _get_description(self) -> str:
         if self.values is not None:
-            return f"[{self.column_name}] has {self.values['n_fuzzy_vals']} fuzzy values: {fmt_percent(self.values['p_fuzzy_vals'])} per category"
+            return f"[{self.column_name}] has {self.values['n_fuzzy_vals']} fuzzy values: {_fmt_percent(self.values['p_fuzzy_vals'])} per category"
         else:
             return f"[{self.column_name}] no dirty categories values."
 
@@ -365,7 +365,7 @@ def __init__(
 
     def _get_description(self) -> str:
         if self.values is not None:
-            return f"[{self.column_name}] has {self.values['n_infinite']} ({fmt_percent(self.values['p_infinite'])}) infinite values"
+            return f"[{self.column_name}] has {self.values['n_infinite']} ({_fmt_percent(self.values['p_infinite'])}) infinite values"
         else:
             return f"[{self.column_name}] has infinite values"
 
@@ -387,7 +387,7 @@ def __init__(
 
     def _get_description(self) -> str:
         if self.values is not None:
-            return f"[{self.column_name}] {self.values['n_missing']} ({fmt_percent(self.values['p_missing'])}) missing values"
+            return f"[{self.column_name}] {self.values['n_missing']} ({_fmt_percent(self.values['p_missing'])}) missing values"
         else:
             return f"[{self.column_name}] has missing values"
 
@@ -541,7 +541,7 @@ def __init__(
 
     def _get_description(self) -> str:
         if self.values is not None:
-            return f"[{self.column_name}] has {self.values['n_zeros']} ({fmt_percent(self.values['p_zeros'])}) zeros"
+            return f"[{self.column_name}] has {self.values['n_zeros']} ({_fmt_percent(self.values['p_zeros'])}) zeros"
         else:
             return f"[{self.column_name}] has predominantly zeros"
 

diff --git a/src/ydata_profiling/model/summary_algorithms.py b/src/ydata_profiling/model/summary_algorithms.py
@@ -11,21 +11,6 @@
 T = TypeVar("T")
 
 
-def func_nullable_series_contains(fn: Callable) -> Callable:
-    @functools.wraps(fn)
-    def inner(
-        config: Settings, series: pd.Series, state: dict, *args, **kwargs
-    ) -> bool:
-        if series.hasnans:
-            series = series.dropna()
-            if series.empty:
-                return False
-
-        return fn(config, series, state, *args, **kwargs)
-
-    return inner
-
-
 def safe_histogram(
     values: np.ndarray,
     bins: Union[int, str, np.ndarray] = "auto",

diff --git a/src/ydata_profiling/report/structure/variables/render_common.py b/src/ydata_profiling/report/structure/variables/render_common.py
@@ -10,7 +10,6 @@ def render_common(config: Settings, summary: dict) -> dict:
     n_freq_table_max = config.n_freq_table_max
 
     template_variables = {
-        # TODO: with nan
         "freq_table_rows": freq_table(
             freqtable=summary["value_counts_without_nan"],
             n=summary["n"],