Data-Centric-AI-Community
diff --git a/‎src/ydata_profiling/config.py‎
Lines changed: 34 additions & 6 deletions b/‎src/ydata_profiling/config.py‎
Lines changed: 34 additions & 6 deletions
diff --git a/‎src/ydata_profiling/model/describe.py‎
Lines changed: 32 additions & 20 deletions b/‎src/ydata_profiling/model/describe.py‎
Lines changed: 32 additions & 20 deletions
diff --git a/‎src/ydata_profiling/model/handler.py‎
Lines changed: 16 additions & 25 deletions b/‎src/ydata_profiling/model/handler.py‎
Lines changed: 16 additions & 25 deletions
@@ -407,8 +407,15 @@ class SparkSettings(Settings):
     samples.random = 0
 
 
-class Config:
-    arg_groups: Dict[str, Any] = {
+class _Config:
+    """Container for configuration presets and shorthand mappings.
+    
+    This class provides predefined configuration groups (sensitive, explorative, themes)
+    and shorthand mappings for common configuration options. It should be used only
+    through its static methods.
+    """
+    
+    arg_groups = {
         "sensitive": {
             "samples": None,
             "duplicates": None,
@@ -475,22 +482,43 @@ class Config:
 
     @staticmethod
     def get_arg_groups(key: str) -> dict:
-        kwargs = Config.arg_groups[key]
-        shorthand_args, _ = Config.shorthands(kwargs, split=False)
+        """Get expanded configuration for a preset group.
+        
+        Args:
+            key: Name of preset group (e.g., "sensitive", "explorative")
+            
+        Returns:
+            Expanded configuration dictionary with shorthands resolved
+        """
+        kwargs = _Config.arg_groups[key]
+        shorthand_args, _ = _Config.shorthands(kwargs, split=False)
         return shorthand_args
 
     @staticmethod
     def shorthands(kwargs: dict, split: bool = True) -> Tuple[dict, dict]:
+        """Expand shorthand configuration keys.
+        
+        Args:
+            kwargs: Configuration dictionary potentially containing shorthands
+            split: If True, remove shorthands from kwargs and return separately.
+                   If False, expand shorthands in-place within kwargs.
+                   
+        Returns:
+            Tuple of (shorthand_args, remaining_kwargs)
+        """
         shorthand_args = {}
         if not split:
             shorthand_args = kwargs
         for key, value in list(kwargs.items()):
-            if value is None and key in Config._shorthands:
-                shorthand_args[key] = Config._shorthands[key]
+            if value is None and key in _Config._shorthands:
+                shorthand_args[key] = _Config._shorthands[key]
                 if split:
                     del kwargs[key]
 
         if split:
             return shorthand_args, kwargs
         else:
             return shorthand_args, {}
+
+
+Config = _Config
@@ -27,6 +27,37 @@
 from ydata_profiling.version import __version__
 
 
+def _validate_inputs(
+    config: Settings, df: Union[pd.DataFrame, "pyspark.sql.DataFrame"]  # type: ignore[name-defined] # noqa: F821
+) -> None:
+    """Validate input types for profiling.
+    
+    Args:
+        config: Report configuration settings
+        df: DataFrame to profile
+        
+    Raises:
+        TypeError: If inputs are of incorrect type
+    """
+    if not isinstance(config, Settings):
+        raise TypeError(f"`config` must be of type `Settings`, got {type(config)}")
+
+    if isinstance(df, pd.DataFrame):
+        return
+        
+    try:
+        from pyspark.sql import DataFrame as SparkDataFrame
+        if isinstance(df, SparkDataFrame):
+            return
+    except ImportError:
+        pass
+        
+    raise TypeError(
+        f"`df` must be either a `pandas.DataFrame` or a `pyspark.sql.DataFrame`, but got {type(df)}."
+        f"If using Spark, make sure PySpark is installed."
+    )
+
+
 def describe(
     config: Settings,
     df: Union[pd.DataFrame, "pyspark.sql.DataFrame"],  # type: ignore[name-defined] # noqa: F821
@@ -52,26 +83,7 @@ def describe(
             - alerts: direct special attention to these patterns in your data.
             - package: package details.
     """
-    # ** Validate Input types **
-    if not isinstance(config, Settings):
-        raise TypeError(f"`config` must be of type `Settings`, got {type(config)}")
-
-    # Validate df input type
-
-    if not isinstance(df, pd.DataFrame):
-        try:
-            from pyspark.sql import DataFrame as SparkDataFrame  # type: ignore
-
-            if not isinstance(df, SparkDataFrame):  # noqa: TC301
-                raise TypeError(  # noqa: TC301
-                    f"`df` must be either a `pandas.DataFrame` or a `pyspark.sql.DataFrame`, but got {type(df)}."
-                )
-        except ImportError as ex:
-            raise TypeError(
-                f"`df must be either a `pandas.DataFrame` or a `pyspark.sql.DataFrame`, but got {type(df)}."
-                f"If using Spark, make sure PySpark is installed."
-            ) from ex
-
+    _validate_inputs(config, df)
     df = preprocess(config, df)
 
     number_of_tasks = 5
 
@@ -25,9 +25,10 @@ def composed_function(*args) -> List[Any]:
 
 
 class Handler:
-    """A generic handler
+    """Generic handler for data type specific processing pipelines.
 
-    Allows any custom mapping between data types and functions
+    Builds a processing pipeline for each data type by composing functions
+    along the type hierarchy. Allows custom summarization strategies.
     """
 
     def __init__(
@@ -42,6 +43,11 @@ def __init__(
         self._complete_dag()
 
     def _complete_dag(self) -> None:
+        """Propagate functions along the type hierarchy DAG.
+
+        Functions defined for parent types are inherited by subtypes,
+        creating a complete processing pipeline for each type.
+        """
         for from_type, to_type in nx.topological_sort(
             nx.line_graph(self.typeset.base_graph)
         ):
@@ -50,32 +56,17 @@ def _complete_dag(self) -> None:
             )
 
     def handle(self, dtype: str, *args, **kwargs) -> dict:
-        """
+        """Execute the processing pipeline for a given data type.
+
+        Args:
+            dtype: Name of the data type to process
+            *args: Arguments passed to the processing pipeline
+            **kwargs: Additional keyword arguments
+
         Returns:
-            object: a tuple containing the config, the dataset series and the summary extracted
+            Extracted summary dictionary
         """
         funcs = self.mapping.get(dtype, [])
         op = compose(funcs)
         summary = op(*args)[-1]
         return summary
-
-
-def get_render_map() -> Dict[str, Callable]:
-    import ydata_profiling.report.structure.variables as render_algorithms
-
-    render_map = {
-        "Boolean": render_algorithms.render_boolean,
-        "Numeric": render_algorithms.render_real,
-        "Complex": render_algorithms.render_complex,
-        "Text": render_algorithms.render_text,
-        "DateTime": render_algorithms.render_date,
-        "Categorical": render_algorithms.render_categorical,
-        "URL": render_algorithms.render_url,
-        "Path": render_algorithms.render_path,
-        "File": render_algorithms.render_file,
-        "Image": render_algorithms.render_image,
-        "Unsupported": render_algorithms.render_generic,
-        "TimeSeries": render_algorithms.render_timeseries,
-    }
-
-    return render_map