Skip to content

Commit 307270e

Browse files
PkchaPkcha
authored andcommitted
feat: initial release
1 parent 754677b commit 307270e

12 files changed

Lines changed: 57 additions & 67 deletions

File tree

src/ydata_profiling/model/handler.py

Lines changed: 34 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -1,79 +1,80 @@
11
"""
22
Auxiliary handler methods for data summary extraction
33
"""
4-
from typing import Any, Callable, Dict, List, Sequence, Tuple, TypeVar, cast
4+
from typing import Any, Callable, Dict, List, Sequence, Tuple, Union
55

66
import networkx as nx
77
from visions import VisionsTypeset
88

9-
T = TypeVar("T")
10-
SummaryFunction = Callable[..., Tuple[Any, ...]]
119

12-
13-
def compose(functions: Sequence[SummaryFunction]) -> SummaryFunction:
10+
def compose(functions: Sequence[Callable]) -> Callable:
1411
"""
1512
Compose a sequence of functions.
1613
17-
:param functions: sequence of functions
18-
:return: combined function applying all functions in order.
14+
Each function in the sequence receives the result of the previous function.
15+
Functions are expected to accept and return tuples for proper chaining.
16+
17+
:param functions: sequence of functions that accept and return tuples
18+
:return: combined function applying all functions in order
1919
"""
2020

2121
def composed_function(*args: Any) -> Tuple[Any, ...]:
22-
result: Tuple[Any, ...] = args
22+
result: Union[Tuple[Any, ...], Any] = args
2323
for func in functions:
24-
step_result = func(*result)
25-
if not isinstance(step_result, tuple):
26-
result = (step_result,)
24+
if isinstance(result, tuple):
25+
result = func(*result)
2726
else:
28-
result = step_result
29-
return result
27+
result = func(result)
28+
if isinstance(result, tuple):
29+
return result
30+
return (result,)
3031

3132
return composed_function
3233

3334

3435
class Handler:
3536
"""A generic handler
3637
37-
Allows any custom mapping between data types and functions
38+
Allows any custom mapping between data types and functions.
39+
Functions are composed based on the type hierarchy defined in the typeset.
3840
"""
3941

4042
def __init__(
4143
self,
42-
mapping: Dict[str, List[SummaryFunction]],
44+
mapping: Dict[str, List[Callable]],
4345
typeset: VisionsTypeset,
4446
*args: Any,
45-
**kwargs: Any,
46-
) -> None:
47-
self.mapping: Dict[str, List[SummaryFunction]] = mapping
47+
**kwargs: Any
48+
):
49+
self.mapping = mapping
4850
self.typeset = typeset
4951
self._complete_dag()
5052

5153
def _complete_dag(self) -> None:
5254
for from_type, to_type in nx.topological_sort(
5355
nx.line_graph(self.typeset.base_graph)
5456
):
55-
from_type_str = str(from_type)
56-
to_type_str = str(to_type)
57-
58-
if from_type_str not in self.mapping:
59-
continue
60-
61-
if to_type_str in self.mapping:
62-
self.mapping[to_type_str] = (
63-
self.mapping[from_type_str] + self.mapping[to_type_str]
64-
)
65-
else:
66-
self.mapping[to_type_str] = self.mapping[from_type_str].copy()
57+
from_key = str(from_type)
58+
to_key = str(to_type)
59+
self.mapping[to_key] = self.mapping.get(from_key, []) + self.mapping.get(
60+
to_key, []
61+
)
6762

6863
def handle(self, dtype: str, *args: Any, **kwargs: Any) -> Dict[str, Any]:
6964
"""
70-
Returns:
71-
object: a tuple containing the config, the dataset series and the summary extracted
65+
Execute the handler chain for the given data type.
66+
67+
:param dtype: the data type to handle
68+
:param args: arguments to pass to the handler functions
69+
:param kwargs: keyword arguments (currently unused but reserved for extensibility)
70+
:return: a dictionary containing the summary extracted from the data
7271
"""
7372
funcs = self.mapping.get(dtype, [])
7473
op = compose(funcs)
7574
result = op(*args)
76-
return cast(Dict[str, Any], result[-1])
75+
if result:
76+
return result[-1] if isinstance(result[-1], dict) else {}
77+
return {}
7778

7879

7980
def get_render_map() -> Dict[str, Callable]:

src/ydata_profiling/model/pandas/describe_categorical_pandas.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ def get_character_counts_vc(vc: pd.Series) -> pd.Series:
2727
if len(counts) > 0:
2828
counts = counts.groupby(level=0, sort=False).sum()
2929
counts = counts.sort_values(ascending=False)
30+
# FIXME: correct in split, below should be zero: print(counts.loc[''])
3031
counts = counts[counts.index.str.len() > 0]
3132
return counts
3233

src/ydata_profiling/model/spark/missing_spark.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -82,11 +82,9 @@ def missing_matrix(config: Settings, df: DataFrame) -> str:
8282
def missing_heatmap(config: Settings, df: DataFrame) -> str:
8383
df = MissingnoBarSparkPatch(df, columns=df.columns, original_df_size=df.count())
8484

85-
# Remove completely filled or completely empty variables.
8685
columns = [i for i, n in enumerate(np.var(df.isnull(), axis="rows")) if n > 0]
8786
df = df.iloc[:, columns]
8887

89-
# Create and mask the correlation matrix. Construct the base heatmap.
9088
corr_mat = df.isnull().corr()
9189
mask = np.zeros_like(corr_mat)
9290
mask[np.triu_indices_from(mask)] = True

src/ydata_profiling/report/presentation/core/collapse.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66

77

88
class Collapse(ItemRenderer):
9-
def __init__(self, button: ToggleButton, item: Renderable, **kwargs: Any):
9+
def __init__(self, button: ToggleButton, item: Renderable, **kwargs):
1010
super().__init__("collapse", {"button": button, "item": item}, **kwargs)
1111

1212
def __repr__(self) -> str:

src/ydata_profiling/report/presentation/core/container.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ def __init__(
1313
anchor_id: Optional[str] = None,
1414
classes: Optional[str] = None,
1515
oss: Optional[bool] = None,
16-
**kwargs: Any,
16+
**kwargs,
1717
):
1818
args = {"items": items, "nested": nested}
1919
args.update(**kwargs)

src/ydata_profiling/report/presentation/core/dropdown.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ def __init__(
1515
anchor_id: str,
1616
classes: list,
1717
is_row: bool,
18-
**kwargs: Any,
18+
**kwargs
1919
):
2020
super().__init__(
2121
"dropdown",

src/ydata_profiling/report/presentation/core/renderable.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
from abc import ABC, abstractmethod
2-
from typing import Any, Callable, Dict, Optional
2+
from typing import Any, Dict, Optional
33

44

55
class Renderable(ABC):
@@ -38,5 +38,5 @@ def __str__(self) -> str:
3838
return self.__class__.__name__
3939

4040
@classmethod
41-
def convert_to_class(cls, obj: "Renderable", flavour_func: Callable) -> None:
41+
def convert_to_class(cls, obj: "Renderable", flavour_func) -> None:
4242
obj.__class__ = cls

src/ydata_profiling/report/presentation/core/root.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ class Root(ItemRenderer):
1111
"""
1212

1313
def __init__(
14-
self, name: str, body: Renderable, footer: Renderable, style: Style, **kwargs: Any
14+
self, name: str, body: Renderable, footer: Renderable, style: Style, **kwargs
1515
):
1616
super().__init__(
1717
"report",
@@ -23,7 +23,7 @@ def __init__(
2323
def __repr__(self) -> str:
2424
return "Root"
2525

26-
def render(self, **kwargs: Any) -> Any:
26+
def render(self, **kwargs) -> Any:
2727
raise NotImplementedError()
2828

2929
@classmethod

src/ydata_profiling/report/presentation/core/variable.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -10,13 +10,13 @@ def __init__(
1010
top: Renderable,
1111
bottom: Optional[Renderable] = None,
1212
ignore: bool = False,
13-
**kwargs: Any,
13+
**kwargs,
1414
):
1515
super().__init__(
1616
"variable", {"top": top, "bottom": bottom, "ignore": ignore}, **kwargs
1717
)
1818

19-
def __str__(self) -> str:
19+
def __str__(self):
2020
top_text = str(self.content["top"]).replace("\n", "\n\t")
2121
bottom_text = str(self.content["bottom"]).replace("\n", "\n\t")
2222

@@ -25,7 +25,7 @@ def __str__(self) -> str:
2525
text += f"- bottom: {bottom_text}"
2626
return text
2727

28-
def __repr__(self) -> str:
28+
def __repr__(self):
2929
return "Variable"
3030

3131
def render(self) -> Any:

src/ydata_profiling/report/presentation/flavours/flavour_html.py

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -41,10 +41,7 @@
4141
HTMLVariableInfo,
4242
)
4343

44-
from typing import cast
45-
from ydata_profiling.report.presentation.flavours.flavours import _FlavourMapping
46-
47-
html_mapping = cast(_FlavourMapping, {
44+
html_mapping = {
4845
Container: HTMLContainer,
4946
Variable: HTMLVariable,
5047
VariableInfo: HTMLVariableInfo,
@@ -62,6 +59,6 @@
6259
Collapse: HTMLCollapse,
6360
CorrelationTable: HTMLCorrelationTable,
6461
Scores: HTMLScores,
65-
})
62+
}
6663

6764
register_flavour("html", html_mapping)

0 commit comments

Comments
 (0)