-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathhelpful_funcs.py
More file actions
48 lines (40 loc) · 1.49 KB
/
helpful_funcs.py
File metadata and controls
48 lines (40 loc) · 1.49 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
import pandas as pd
from json import load as json_file_stream_to_dict
from json import JSONDecodeError
from pathlib import Path
def read_json_file_to_dict(file_path:Path) -> dict: # deprecated since moving to mongodb
"""
Reads a json file and returns a dictionary
## Parameters
file_path: Path
Path to the json file
## Returns
dict
Dictionary with the contents of the json file
## Raises
FileNotFoundError
If the file does not exist
"""
if not(file_path.exists()):
raise FileNotFoundError(f"File {file_path} does not exist.")
with open(file_path, "r") as json_file:
try:
return json_file_stream_to_dict(json_file)
except JSONDecodeError:
raise JSONDecodeError(f"File {file_path} is not a valid json file.")
def downcast_all_numerical_cols_in_df(df:pd.DataFrame) -> pd.DataFrame:
"""
Downcasts all numerical columns in a dataframe to the smallest possible type.
This is useful to save memory, and gain small performance improvements.
## Parameters
df: pd.DataFrame
Dataframe to downcast
## Returns
pd.DataFrame
Dataframe with downcasted numerical columns
"""
for col in df.select_dtypes(include=['float']).columns:
df[col] = pd.to_numeric(df[col], downcast='float')
for col in df.select_dtypes(include=['int']).columns:
df[col] = pd.to_numeric(df[col], downcast='integer')
return df