graphPaname/app/preprocessing.py

25 lines
567 B
Python

from json import load
from pandas import DataFrame, json_normalize
from constants import COLUMNS, FILES
def open_json(dataset) -> dict:
"""
Loads a dictionary with data from a JSON file
"""
with open(FILES[dataset]) as f:
json = load(f)
return json
def create_dataframe(dataset) -> DataFrame:
"""
Creates a DataFrame from a JSON file
"""
json = open_json(dataset)
df = json_normalize(data=json, record_path=["records"], errors="ignore",)
filtered_df = df.filter(items=COLUMNS[dataset])
return filtered_df