Skip to content

Commit 307cba9

Browse files
PkchaPkcha
authored andcommitted
feat: initial release
1 parent 8d8f6b7 commit 307cba9

6 files changed

Lines changed: 90 additions & 95 deletions

File tree

Lines changed: 63 additions & 60 deletions
Original file line numberDiff line numberDiff line change
@@ -1,60 +1,63 @@
1-
"""
2-
Auxiliary handler methods for data summary extraction
3-
"""
4-
from typing import Any, Callable, Dict, List, Sequence
5-
6-
import networkx as nx
7-
from visions import VisionsTypeset
8-
9-
10-
def compose(functions: Sequence[Callable]) -> Callable:
11-
"""
12-
Compose a sequence of functions.
13-
14-
:param functions: sequence of functions
15-
:return: combined function applying all functions in order.
16-
"""
17-
18-
def composed_function(*args) -> List[Any]:
19-
result = args # Start with the input arguments
20-
for func in functions:
21-
result = func(*result) if isinstance(result, tuple) else func(result)
22-
return result # type: ignore
23-
24-
return composed_function # type: ignore
25-
26-
27-
class Handler:
28-
"""A generic handler
29-
30-
Allows any custom mapping between data types and functions
31-
"""
32-
33-
def __init__(
34-
self,
35-
mapping: Dict[str, List[Callable]],
36-
typeset: VisionsTypeset,
37-
*args,
38-
**kwargs
39-
):
40-
self.mapping = mapping
41-
self.typeset = typeset
42-
self._complete_dag()
43-
44-
def _complete_dag(self) -> None:
45-
for from_type, to_type in nx.topological_sort(
46-
nx.line_graph(self.typeset.base_graph)
47-
):
48-
self.mapping[str(to_type)] = (
49-
self.mapping[str(from_type)] + self.mapping[str(to_type)]
50-
)
51-
52-
def handle(self, dtype: str, *args, **kwargs) -> dict:
53-
"""
54-
Returns:
55-
object: a tuple containing the config, the dataset series and the summary extracted
56-
"""
57-
funcs = self.mapping.get(dtype, [])
58-
op = compose(funcs)
59-
summary = op(*args)[-1]
60-
return summary
1+
"""
2+
Auxiliary handler methods for data summary extraction
3+
"""
4+
from typing import Any, Callable, Dict, List, Sequence
5+
6+
import networkx as nx
7+
from visions import VisionsTypeset
8+
9+
10+
def compose(functions: Sequence[Callable]) -> Callable:
11+
"""
12+
Compose a sequence of functions.
13+
14+
:param functions: sequence of functions
15+
:return: combined function applying all functions in order.
16+
"""
17+
18+
def composed_function(*args) -> List[Any]:
19+
result = args # Start with the input arguments
20+
for func in functions:
21+
result = func(*result) if isinstance(result, tuple) else func(result)
22+
return result # type: ignore
23+
24+
return composed_function # type: ignore
25+
26+
27+
class Handler:
28+
"""A generic handler
29+
30+
Allows any custom mapping between data types and functions
31+
"""
32+
33+
def __init__(
34+
self,
35+
mapping: Dict[str, List[Callable]],
36+
typeset: VisionsTypeset,
37+
*args,
38+
**kwargs
39+
):
40+
self.mapping = mapping
41+
self.typeset = typeset
42+
self._complete_dag()
43+
44+
def _complete_dag(self) -> None:
45+
for from_type, to_type in nx.topological_sort(
46+
nx.line_graph(self.typeset.base_graph)
47+
):
48+
self.mapping[str(to_type)] = (
49+
self.mapping[str(from_type)] + self.mapping[str(to_type)]
50+
)
51+
52+
def handle(self, dtype: str, *args, **kwargs) -> dict:
53+
"""
54+
Returns:
55+
object: a tuple containing the config, the dataset series and the summary extracted
56+
"""
57+
funcs = self.mapping.get(dtype, [])
58+
op = compose(funcs)
59+
summary = op(*args)[-1]
60+
return summary
61+
62+
63+

src/ydata_profiling/model/summarizer.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@
2727
from ydata_profiling.model.pandas.describe_supported_pandas import (
2828
pandas_describe_supported,
2929
)
30-
from ydata_profiling.model.summary_algorithms import ( # Check what is this method used for
30+
from ydata_profiling.model.summary_algorithms import (
3131
describe_file_1d,
3232
describe_image_1d,
3333
describe_path_1d,
@@ -50,9 +50,8 @@ def summarize(
5050
return self.handle(str(dtype), config, series, {"type": str(dtype)})
5151

5252

53-
# Revisit this with the correct support for Spark as well.
5453
class ProfilingSummarizer(BaseSummarizer):
55-
"""A summarizer for Pandas DataFrames."""
54+
"""A summarizer supporting both Pandas and Spark DataFrames."""
5655

5756
def __init__(self, typeset: VisionsTypeset, use_spark: bool = False):
5857
self.use_spark = use_spark and is_pyspark_installed()
Lines changed: 0 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -1,31 +1 @@
11
"""Data structure for the report"""
2-
from typing import Callable, Dict
3-
4-
5-
def get_render_map() -> Dict[str, Callable]:
6-
"""Get the mapping of variable types to their render functions.
7-
8-
This function was moved from model.handler to report.structure to eliminate
9-
the reverse dependency from model layer to report layer.
10-
11-
Returns:
12-
Dictionary mapping type names to render functions.
13-
"""
14-
import ydata_profiling.report.structure.variables as render_algorithms
15-
16-
render_map = {
17-
"Boolean": render_algorithms.render_boolean,
18-
"Numeric": render_algorithms.render_real,
19-
"Complex": render_algorithms.render_complex,
20-
"Text": render_algorithms.render_text,
21-
"DateTime": render_algorithms.render_date,
22-
"Categorical": render_algorithms.render_categorical,
23-
"URL": render_algorithms.render_url,
24-
"Path": render_algorithms.render_path,
25-
"File": render_algorithms.render_file,
26-
"Image": render_algorithms.render_image,
27-
"Unsupported": render_algorithms.render_generic,
28-
"TimeSeries": render_algorithms.render_timeseries,
29-
}
30-
31-
return render_map

src/ydata_profiling/report/structure/report.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
from ydata_profiling.config import Settings
88
from ydata_profiling.model import BaseDescription
99
from ydata_profiling.model.alerts import AlertType
10-
from ydata_profiling.report.structure import get_render_map
10+
from ydata_profiling.report.structure.variables import get_render_map
1111
from ydata_profiling.report.presentation.core import (
1212
HTML,
1313
Collapse,

src/ydata_profiling/report/structure/variables/__init__.py

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
from typing import Callable, Dict
2+
13
from ydata_profiling.report.structure.variables.render_boolean import render_boolean
24
from ydata_profiling.report.structure.variables.render_categorical import (
35
render_categorical,
@@ -17,6 +19,26 @@
1719
)
1820
from ydata_profiling.report.structure.variables.render_url import render_url
1921

22+
23+
def get_render_map() -> Dict[str, Callable]:
24+
render_map = {
25+
"Boolean": render_boolean,
26+
"Numeric": render_real,
27+
"Complex": render_complex,
28+
"Text": render_text,
29+
"DateTime": render_date,
30+
"Categorical": render_categorical,
31+
"URL": render_url,
32+
"Path": render_path,
33+
"File": render_file,
34+
"Image": render_image,
35+
"Unsupported": render_generic,
36+
"TimeSeries": render_timeseries,
37+
}
38+
39+
return render_map
40+
41+
2042
__all__ = [
2143
"render_boolean",
2244
"render_categorical",
@@ -32,4 +54,5 @@
3254
"render_text",
3355
"render_timeseries",
3456
"render_url",
57+
"get_render_map",
3558
]

src/ydata_profiling/utils/backend.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
"""
2-
Backend detection utilities for pandas and spark.
2+
File with a function to check the backend being used
33
"""
44
import importlib
55

0 commit comments

Comments
 (0)