-
Notifications
You must be signed in to change notification settings - Fork 1.8k
Expand file tree
/
Copy pathbackend.py
More file actions
41 lines (31 loc) · 1.35 KB
/
backend.py
File metadata and controls
41 lines (31 loc) · 1.35 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
"""
File with backend utilities and helper functions to check the backend being used
"""
import importlib
from typing import Callable, Optional, Sized, Union
import pandas as pd
def is_pyspark_installed() -> bool:
"""Check if PySpark is installed without importing it."""
return importlib.util.find_spec("pyspark") is not None
class BaseBackend:
"""Base helper class to select and cache the appropriate backend (Pandas or Spark)."""
_pandas_module: Optional[str] = None
_spark_module: Optional[str] = None
def __init__(self, df: Union[pd.DataFrame, Sized]):
"""Determine backend once and store it for all computations."""
if isinstance(df, pd.DataFrame):
module_path = self._pandas_module
else:
module_path = self._spark_module
if module_path is None:
raise ValueError("Backend module path not configured")
self.module = importlib.import_module(module_path)
self.module_path = module_path
def get_method(self, method_name: str) -> Callable:
"""Retrieve the appropriate function from the backend module."""
try:
return getattr(self.module, method_name)
except AttributeError as ex:
raise AttributeError(
f"Function '{method_name}' is not available in {self.module_path}."
) from ex