Skip to content

Commit dbfcf11

Browse files
PkchaPkcha
authored andcommitted
feat: initial release
1 parent 7775fd0 commit dbfcf11

8 files changed

Lines changed: 373 additions & 435 deletions

File tree

src/ydata_profiling/config.py

Lines changed: 6 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -407,15 +407,8 @@ class SparkSettings(Settings):
407407
samples.random = 0
408408

409409

410-
class _Config:
411-
"""Container for configuration presets and shorthand mappings.
412-
413-
This class provides predefined configuration groups (sensitive, explorative, themes)
414-
and shorthand mappings for common configuration options. It should be used only
415-
through its static methods.
416-
"""
417-
418-
arg_groups = {
410+
class Config:
411+
arg_groups: Dict[str, Any] = {
419412
"sensitive": {
420413
"samples": None,
421414
"duplicates": None,
@@ -482,43 +475,22 @@ class _Config:
482475

483476
@staticmethod
484477
def get_arg_groups(key: str) -> dict:
485-
"""Get expanded configuration for a preset group.
486-
487-
Args:
488-
key: Name of preset group (e.g., "sensitive", "explorative")
489-
490-
Returns:
491-
Expanded configuration dictionary with shorthands resolved
492-
"""
493-
kwargs = _Config.arg_groups[key]
494-
shorthand_args, _ = _Config.shorthands(kwargs, split=False)
478+
kwargs = Config.arg_groups[key]
479+
shorthand_args, _ = Config.shorthands(kwargs, split=False)
495480
return shorthand_args
496481

497482
@staticmethod
498483
def shorthands(kwargs: dict, split: bool = True) -> Tuple[dict, dict]:
499-
"""Expand shorthand configuration keys.
500-
501-
Args:
502-
kwargs: Configuration dictionary potentially containing shorthands
503-
split: If True, remove shorthands from kwargs and return separately.
504-
If False, expand shorthands in-place within kwargs.
505-
506-
Returns:
507-
Tuple of (shorthand_args, remaining_kwargs)
508-
"""
509484
shorthand_args = {}
510485
if not split:
511486
shorthand_args = kwargs
512487
for key, value in list(kwargs.items()):
513-
if value is None and key in _Config._shorthands:
514-
shorthand_args[key] = _Config._shorthands[key]
488+
if value is None and key in Config._shorthands:
489+
shorthand_args[key] = Config._shorthands[key]
515490
if split:
516491
del kwargs[key]
517492

518493
if split:
519494
return shorthand_args, kwargs
520495
else:
521496
return shorthand_args, {}
522-
523-
524-
Config = _Config

src/ydata_profiling/model/alerts.py

Lines changed: 1 addition & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -9,27 +9,10 @@
99

1010
from ydata_profiling.config import Settings
1111
from ydata_profiling.model.correlations import perform_check_correlation
12+
from ydata_profiling.utils.formatters import fmt_percent
1213
from ydata_profiling.utils.styles import get_alert_styles
1314

1415

15-
def fmt_percent(value: float, edge_cases: bool = True) -> str:
16-
"""Format a ratio as a percentage.
17-
18-
Args:
19-
edge_cases: Check for edge cases?
20-
value: The ratio.
21-
22-
Returns:
23-
The percentage with 1 point precision.
24-
"""
25-
if edge_cases and round(value, 3) == 0 and value > 0:
26-
return "< 0.1%"
27-
if edge_cases and round(value, 3) == 1 and value < 1:
28-
return "> 99.9%"
29-
30-
return f"{value*100:2.1f}%"
31-
32-
3316
@unique
3417
class AlertType(Enum):
3518
"""Alert types"""

src/ydata_profiling/model/describe.py

Lines changed: 20 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -27,37 +27,6 @@
2727
from ydata_profiling.version import __version__
2828

2929

30-
def _validate_inputs(
31-
config: Settings, df: Union[pd.DataFrame, "pyspark.sql.DataFrame"] # type: ignore[name-defined] # noqa: F821
32-
) -> None:
33-
"""Validate input types for profiling.
34-
35-
Args:
36-
config: Report configuration settings
37-
df: DataFrame to profile
38-
39-
Raises:
40-
TypeError: If inputs are of incorrect type
41-
"""
42-
if not isinstance(config, Settings):
43-
raise TypeError(f"`config` must be of type `Settings`, got {type(config)}")
44-
45-
if isinstance(df, pd.DataFrame):
46-
return
47-
48-
try:
49-
from pyspark.sql import DataFrame as SparkDataFrame
50-
if isinstance(df, SparkDataFrame):
51-
return
52-
except ImportError:
53-
pass
54-
55-
raise TypeError(
56-
f"`df` must be either a `pandas.DataFrame` or a `pyspark.sql.DataFrame`, but got {type(df)}."
57-
f"If using Spark, make sure PySpark is installed."
58-
)
59-
60-
6130
def describe(
6231
config: Settings,
6332
df: Union[pd.DataFrame, "pyspark.sql.DataFrame"], # type: ignore[name-defined] # noqa: F821
@@ -83,7 +52,26 @@ def describe(
8352
- alerts: direct special attention to these patterns in your data.
8453
- package: package details.
8554
"""
86-
_validate_inputs(config, df)
55+
# ** Validate Input types **
56+
if not isinstance(config, Settings):
57+
raise TypeError(f"`config` must be of type `Settings`, got {type(config)}")
58+
59+
# Validate df input type
60+
61+
if not isinstance(df, pd.DataFrame):
62+
try:
63+
from pyspark.sql import DataFrame as SparkDataFrame # type: ignore
64+
65+
if not isinstance(df, SparkDataFrame): # noqa: TC301
66+
raise TypeError( # noqa: TC301
67+
f"`df` must be either a `pandas.DataFrame` or a `pyspark.sql.DataFrame`, but got {type(df)}."
68+
)
69+
except ImportError as ex:
70+
raise TypeError(
71+
f"`df must be either a `pandas.DataFrame` or a `pyspark.sql.DataFrame`, but got {type(df)}."
72+
f"If using Spark, make sure PySpark is installed."
73+
) from ex
74+
8775
df = preprocess(config, df)
8876

8977
number_of_tasks = 5

src/ydata_profiling/model/handler.py

Lines changed: 4 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -25,10 +25,9 @@ def composed_function(*args) -> List[Any]:
2525

2626

2727
class Handler:
28-
"""Generic handler for data type specific processing pipelines.
28+
"""A generic handler
2929
30-
Builds a processing pipeline for each data type by composing functions
31-
along the type hierarchy. Allows custom summarization strategies.
30+
Allows any custom mapping between data types and functions
3231
"""
3332

3433
def __init__(
@@ -43,11 +42,6 @@ def __init__(
4342
self._complete_dag()
4443

4544
def _complete_dag(self) -> None:
46-
"""Propagate functions along the type hierarchy DAG.
47-
48-
Functions defined for parent types are inherited by subtypes,
49-
creating a complete processing pipeline for each type.
50-
"""
5145
for from_type, to_type in nx.topological_sort(
5246
nx.line_graph(self.typeset.base_graph)
5347
):
@@ -56,15 +50,9 @@ def _complete_dag(self) -> None:
5650
)
5751

5852
def handle(self, dtype: str, *args, **kwargs) -> dict:
59-
"""Execute the processing pipeline for a given data type.
60-
61-
Args:
62-
dtype: Name of the data type to process
63-
*args: Arguments passed to the processing pipeline
64-
**kwargs: Additional keyword arguments
65-
53+
"""
6654
Returns:
67-
Extracted summary dictionary
55+
object: a tuple containing the config, the dataset series and the summary extracted
6856
"""
6957
funcs = self.mapping.get(dtype, [])
7058
op = compose(funcs)

0 commit comments

Comments
 (0)