2727from ydata_profiling .version import __version__
2828
2929
30- def _validate_inputs (
31- config : Settings , df : Union [pd .DataFrame , "pyspark.sql.DataFrame" ] # type: ignore[name-defined] # noqa: F821
32- ) -> None :
33- """Validate input types for profiling.
34-
35- Args:
36- config: Report configuration settings
37- df: DataFrame to profile
38-
39- Raises:
40- TypeError: If inputs are of incorrect type
41- """
42- if not isinstance (config , Settings ):
43- raise TypeError (f"`config` must be of type `Settings`, got { type (config )} " )
44-
45- if isinstance (df , pd .DataFrame ):
46- return
47-
48- try :
49- from pyspark .sql import DataFrame as SparkDataFrame
50- if isinstance (df , SparkDataFrame ):
51- return
52- except ImportError :
53- pass
54-
55- raise TypeError (
56- f"`df` must be either a `pandas.DataFrame` or a `pyspark.sql.DataFrame`, but got { type (df )} ."
57- f"If using Spark, make sure PySpark is installed."
58- )
59-
60-
6130def describe (
6231 config : Settings ,
6332 df : Union [pd .DataFrame , "pyspark.sql.DataFrame" ], # type: ignore[name-defined] # noqa: F821
@@ -83,7 +52,26 @@ def describe(
8352 - alerts: direct special attention to these patterns in your data.
8453 - package: package details.
8554 """
86- _validate_inputs (config , df )
55+ # ** Validate Input types **
56+ if not isinstance (config , Settings ):
57+ raise TypeError (f"`config` must be of type `Settings`, got { type (config )} " )
58+
59+ # Validate df input type
60+
61+ if not isinstance (df , pd .DataFrame ):
62+ try :
63+ from pyspark .sql import DataFrame as SparkDataFrame # type: ignore
64+
65+ if not isinstance (df , SparkDataFrame ): # noqa: TC301
66+ raise TypeError ( # noqa: TC301
67+ f"`df` must be either a `pandas.DataFrame` or a `pyspark.sql.DataFrame`, but got { type (df )} ."
68+ )
69+ except ImportError as ex :
70+ raise TypeError (
71+ f"`df must be either a `pandas.DataFrame` or a `pyspark.sql.DataFrame`, but got { type (df )} ."
72+ f"If using Spark, make sure PySpark is installed."
73+ ) from ex
74+
8775 df = preprocess (config , df )
8876
8977 number_of_tasks = 5
0 commit comments