Data-Centric-AI-Community
diff --git a/‎src/ydata_profiling/config.py‎
Lines changed: 6 additions & 34 deletions b/‎src/ydata_profiling/config.py‎
Lines changed: 6 additions & 34 deletions
diff --git a/‎src/ydata_profiling/model/alerts.py‎
Lines changed: 1 addition & 18 deletions b/‎src/ydata_profiling/model/alerts.py‎
Lines changed: 1 addition & 18 deletions
diff --git a/‎src/ydata_profiling/model/describe.py‎
Lines changed: 20 additions & 32 deletions b/‎src/ydata_profiling/model/describe.py‎
Lines changed: 20 additions & 32 deletions
diff --git a/‎src/ydata_profiling/model/handler.py‎
Lines changed: 4 additions & 16 deletions b/‎src/ydata_profiling/model/handler.py‎
Lines changed: 4 additions & 16 deletions
@@ -407,15 +407,8 @@ class SparkSettings(Settings):
     samples.random = 0
 
 
-class _Config:
-    """Container for configuration presets and shorthand mappings.
-    
-    This class provides predefined configuration groups (sensitive, explorative, themes)
-    and shorthand mappings for common configuration options. It should be used only
-    through its static methods.
-    """
-    
-    arg_groups = {
+class Config:
+    arg_groups: Dict[str, Any] = {
         "sensitive": {
             "samples": None,
             "duplicates": None,
@@ -482,43 +475,22 @@ class _Config:
 
     @staticmethod
     def get_arg_groups(key: str) -> dict:
-        """Get expanded configuration for a preset group.
-        
-        Args:
-            key: Name of preset group (e.g., "sensitive", "explorative")
-            
-        Returns:
-            Expanded configuration dictionary with shorthands resolved
-        """
-        kwargs = _Config.arg_groups[key]
-        shorthand_args, _ = _Config.shorthands(kwargs, split=False)
+        kwargs = Config.arg_groups[key]
+        shorthand_args, _ = Config.shorthands(kwargs, split=False)
         return shorthand_args
 
     @staticmethod
     def shorthands(kwargs: dict, split: bool = True) -> Tuple[dict, dict]:
-        """Expand shorthand configuration keys.
-        
-        Args:
-            kwargs: Configuration dictionary potentially containing shorthands
-            split: If True, remove shorthands from kwargs and return separately.
-                   If False, expand shorthands in-place within kwargs.
-                   
-        Returns:
-            Tuple of (shorthand_args, remaining_kwargs)
-        """
         shorthand_args = {}
         if not split:
             shorthand_args = kwargs
         for key, value in list(kwargs.items()):
-            if value is None and key in _Config._shorthands:
-                shorthand_args[key] = _Config._shorthands[key]
+            if value is None and key in Config._shorthands:
+                shorthand_args[key] = Config._shorthands[key]
                 if split:
                     del kwargs[key]
 
         if split:
             return shorthand_args, kwargs
         else:
             return shorthand_args, {}
-
-
-Config = _Config
@@ -9,27 +9,10 @@
 
 from ydata_profiling.config import Settings
 from ydata_profiling.model.correlations import perform_check_correlation
+from ydata_profiling.utils.formatters import fmt_percent
 from ydata_profiling.utils.styles import get_alert_styles
 
 
-def fmt_percent(value: float, edge_cases: bool = True) -> str:
-    """Format a ratio as a percentage.
-
-    Args:
-        edge_cases: Check for edge cases?
-        value: The ratio.
-
-    Returns:
-        The percentage with 1 point precision.
-    """
-    if edge_cases and round(value, 3) == 0 and value > 0:
-        return "< 0.1%"
-    if edge_cases and round(value, 3) == 1 and value < 1:
-        return "> 99.9%"
-
-    return f"{value*100:2.1f}%"
-
-
 @unique
 class AlertType(Enum):
     """Alert types"""
 
@@ -27,37 +27,6 @@
 from ydata_profiling.version import __version__
 
 
-def _validate_inputs(
-    config: Settings, df: Union[pd.DataFrame, "pyspark.sql.DataFrame"]  # type: ignore[name-defined] # noqa: F821
-) -> None:
-    """Validate input types for profiling.
-    
-    Args:
-        config: Report configuration settings
-        df: DataFrame to profile
-        
-    Raises:
-        TypeError: If inputs are of incorrect type
-    """
-    if not isinstance(config, Settings):
-        raise TypeError(f"`config` must be of type `Settings`, got {type(config)}")
-
-    if isinstance(df, pd.DataFrame):
-        return
-        
-    try:
-        from pyspark.sql import DataFrame as SparkDataFrame
-        if isinstance(df, SparkDataFrame):
-            return
-    except ImportError:
-        pass
-        
-    raise TypeError(
-        f"`df` must be either a `pandas.DataFrame` or a `pyspark.sql.DataFrame`, but got {type(df)}."
-        f"If using Spark, make sure PySpark is installed."
-    )
-
-
 def describe(
     config: Settings,
     df: Union[pd.DataFrame, "pyspark.sql.DataFrame"],  # type: ignore[name-defined] # noqa: F821
@@ -83,7 +52,26 @@ def describe(
             - alerts: direct special attention to these patterns in your data.
             - package: package details.
     """
-    _validate_inputs(config, df)
+    # ** Validate Input types **
+    if not isinstance(config, Settings):
+        raise TypeError(f"`config` must be of type `Settings`, got {type(config)}")
+
+    # Validate df input type
+
+    if not isinstance(df, pd.DataFrame):
+        try:
+            from pyspark.sql import DataFrame as SparkDataFrame  # type: ignore
+
+            if not isinstance(df, SparkDataFrame):  # noqa: TC301
+                raise TypeError(  # noqa: TC301
+                    f"`df` must be either a `pandas.DataFrame` or a `pyspark.sql.DataFrame`, but got {type(df)}."
+                )
+        except ImportError as ex:
+            raise TypeError(
+                f"`df must be either a `pandas.DataFrame` or a `pyspark.sql.DataFrame`, but got {type(df)}."
+                f"If using Spark, make sure PySpark is installed."
+            ) from ex
+
     df = preprocess(config, df)
 
     number_of_tasks = 5
 
@@ -25,10 +25,9 @@ def composed_function(*args) -> List[Any]:
 
 
 class Handler:
-    """Generic handler for data type specific processing pipelines.
+    """A generic handler
 
-    Builds a processing pipeline for each data type by composing functions
-    along the type hierarchy. Allows custom summarization strategies.
+    Allows any custom mapping between data types and functions
     """
 
     def __init__(
@@ -43,11 +42,6 @@ def __init__(
         self._complete_dag()
 
     def _complete_dag(self) -> None:
-        """Propagate functions along the type hierarchy DAG.
-
-        Functions defined for parent types are inherited by subtypes,
-        creating a complete processing pipeline for each type.
-        """
         for from_type, to_type in nx.topological_sort(
             nx.line_graph(self.typeset.base_graph)
         ):
@@ -56,15 +50,9 @@ def _complete_dag(self) -> None:
             )
 
     def handle(self, dtype: str, *args, **kwargs) -> dict:
-        """Execute the processing pipeline for a given data type.
-
-        Args:
-            dtype: Name of the data type to process
-            *args: Arguments passed to the processing pipeline
-            **kwargs: Additional keyword arguments
-
+        """
         Returns:
-            Extracted summary dictionary
+            object: a tuple containing the config, the dataset series and the summary extracted
         """
         funcs = self.mapping.get(dtype, [])
         op = compose(funcs)