Skip to content

Commit 7775fd0

Browse files
PkchaPkcha
authored andcommitted
feat: initial release
1 parent 82479e9 commit 7775fd0

6 files changed

Lines changed: 237 additions & 134 deletions

File tree

src/ydata_profiling/config.py

Lines changed: 34 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -407,8 +407,15 @@ class SparkSettings(Settings):
407407
samples.random = 0
408408

409409

410-
class Config:
411-
arg_groups: Dict[str, Any] = {
410+
class _Config:
411+
"""Container for configuration presets and shorthand mappings.
412+
413+
This class provides predefined configuration groups (sensitive, explorative, themes)
414+
and shorthand mappings for common configuration options. It should be used only
415+
through its static methods.
416+
"""
417+
418+
arg_groups = {
412419
"sensitive": {
413420
"samples": None,
414421
"duplicates": None,
@@ -475,22 +482,43 @@ class Config:
475482

476483
@staticmethod
477484
def get_arg_groups(key: str) -> dict:
478-
kwargs = Config.arg_groups[key]
479-
shorthand_args, _ = Config.shorthands(kwargs, split=False)
485+
"""Get expanded configuration for a preset group.
486+
487+
Args:
488+
key: Name of preset group (e.g., "sensitive", "explorative")
489+
490+
Returns:
491+
Expanded configuration dictionary with shorthands resolved
492+
"""
493+
kwargs = _Config.arg_groups[key]
494+
shorthand_args, _ = _Config.shorthands(kwargs, split=False)
480495
return shorthand_args
481496

482497
@staticmethod
483498
def shorthands(kwargs: dict, split: bool = True) -> Tuple[dict, dict]:
499+
"""Expand shorthand configuration keys.
500+
501+
Args:
502+
kwargs: Configuration dictionary potentially containing shorthands
503+
split: If True, remove shorthands from kwargs and return separately.
504+
If False, expand shorthands in-place within kwargs.
505+
506+
Returns:
507+
Tuple of (shorthand_args, remaining_kwargs)
508+
"""
484509
shorthand_args = {}
485510
if not split:
486511
shorthand_args = kwargs
487512
for key, value in list(kwargs.items()):
488-
if value is None and key in Config._shorthands:
489-
shorthand_args[key] = Config._shorthands[key]
513+
if value is None and key in _Config._shorthands:
514+
shorthand_args[key] = _Config._shorthands[key]
490515
if split:
491516
del kwargs[key]
492517

493518
if split:
494519
return shorthand_args, kwargs
495520
else:
496521
return shorthand_args, {}
522+
523+
524+
Config = _Config

src/ydata_profiling/model/describe.py

Lines changed: 32 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,37 @@
2727
from ydata_profiling.version import __version__
2828

2929

30+
def _validate_inputs(
31+
config: Settings, df: Union[pd.DataFrame, "pyspark.sql.DataFrame"] # type: ignore[name-defined] # noqa: F821
32+
) -> None:
33+
"""Validate input types for profiling.
34+
35+
Args:
36+
config: Report configuration settings
37+
df: DataFrame to profile
38+
39+
Raises:
40+
TypeError: If inputs are of incorrect type
41+
"""
42+
if not isinstance(config, Settings):
43+
raise TypeError(f"`config` must be of type `Settings`, got {type(config)}")
44+
45+
if isinstance(df, pd.DataFrame):
46+
return
47+
48+
try:
49+
from pyspark.sql import DataFrame as SparkDataFrame
50+
if isinstance(df, SparkDataFrame):
51+
return
52+
except ImportError:
53+
pass
54+
55+
raise TypeError(
56+
f"`df` must be either a `pandas.DataFrame` or a `pyspark.sql.DataFrame`, but got {type(df)}."
57+
f"If using Spark, make sure PySpark is installed."
58+
)
59+
60+
3061
def describe(
3162
config: Settings,
3263
df: Union[pd.DataFrame, "pyspark.sql.DataFrame"], # type: ignore[name-defined] # noqa: F821
@@ -52,26 +83,7 @@ def describe(
5283
- alerts: direct special attention to these patterns in your data.
5384
- package: package details.
5485
"""
55-
# ** Validate Input types **
56-
if not isinstance(config, Settings):
57-
raise TypeError(f"`config` must be of type `Settings`, got {type(config)}")
58-
59-
# Validate df input type
60-
61-
if not isinstance(df, pd.DataFrame):
62-
try:
63-
from pyspark.sql import DataFrame as SparkDataFrame # type: ignore
64-
65-
if not isinstance(df, SparkDataFrame): # noqa: TC301
66-
raise TypeError( # noqa: TC301
67-
f"`df` must be either a `pandas.DataFrame` or a `pyspark.sql.DataFrame`, but got {type(df)}."
68-
)
69-
except ImportError as ex:
70-
raise TypeError(
71-
f"`df must be either a `pandas.DataFrame` or a `pyspark.sql.DataFrame`, but got {type(df)}."
72-
f"If using Spark, make sure PySpark is installed."
73-
) from ex
74-
86+
_validate_inputs(config, df)
7587
df = preprocess(config, df)
7688

7789
number_of_tasks = 5

src/ydata_profiling/model/handler.py

Lines changed: 16 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -25,9 +25,10 @@ def composed_function(*args) -> List[Any]:
2525

2626

2727
class Handler:
28-
"""A generic handler
28+
"""Generic handler for data type specific processing pipelines.
2929
30-
Allows any custom mapping between data types and functions
30+
Builds a processing pipeline for each data type by composing functions
31+
along the type hierarchy. Allows custom summarization strategies.
3132
"""
3233

3334
def __init__(
@@ -42,6 +43,11 @@ def __init__(
4243
self._complete_dag()
4344

4445
def _complete_dag(self) -> None:
46+
"""Propagate functions along the type hierarchy DAG.
47+
48+
Functions defined for parent types are inherited by subtypes,
49+
creating a complete processing pipeline for each type.
50+
"""
4551
for from_type, to_type in nx.topological_sort(
4652
nx.line_graph(self.typeset.base_graph)
4753
):
@@ -50,32 +56,17 @@ def _complete_dag(self) -> None:
5056
)
5157

5258
def handle(self, dtype: str, *args, **kwargs) -> dict:
53-
"""
59+
"""Execute the processing pipeline for a given data type.
60+
61+
Args:
62+
dtype: Name of the data type to process
63+
*args: Arguments passed to the processing pipeline
64+
**kwargs: Additional keyword arguments
65+
5466
Returns:
55-
object: a tuple containing the config, the dataset series and the summary extracted
67+
Extracted summary dictionary
5668
"""
5769
funcs = self.mapping.get(dtype, [])
5870
op = compose(funcs)
5971
summary = op(*args)[-1]
6072
return summary
61-
62-
63-
def get_render_map() -> Dict[str, Callable]:
64-
import ydata_profiling.report.structure.variables as render_algorithms
65-
66-
render_map = {
67-
"Boolean": render_algorithms.render_boolean,
68-
"Numeric": render_algorithms.render_real,
69-
"Complex": render_algorithms.render_complex,
70-
"Text": render_algorithms.render_text,
71-
"DateTime": render_algorithms.render_date,
72-
"Categorical": render_algorithms.render_categorical,
73-
"URL": render_algorithms.render_url,
74-
"Path": render_algorithms.render_path,
75-
"File": render_algorithms.render_file,
76-
"Image": render_algorithms.render_image,
77-
"Unsupported": render_algorithms.render_generic,
78-
"TimeSeries": render_algorithms.render_timeseries,
79-
}
80-
81-
return render_map

0 commit comments

Comments
 (0)