graphPaname/app/preprocessing.py

28 lines
591 B
Python
Raw Normal View History

2020-05-27 20:13:45 +02:00
from json import load
2020-05-27 20:45:21 +02:00
from pandas import json_normalize, DataFrame
2020-06-05 13:48:47 +02:00
from constants import FILES, COLUMNS
2020-05-27 20:13:45 +02:00
2020-06-05 13:48:47 +02:00
def open_json(dataset) -> dict:
"""
Loads a dictionary with data from a JSON file
"""
with open(FILES[dataset]) as f:
2020-05-27 20:13:45 +02:00
json = load(f)
return json
2020-06-05 13:48:47 +02:00
def create_dataframe(dataset) -> DataFrame:
"""
Creates a DataFrame from a JSON file
"""
2020-05-27 20:13:45 +02:00
json = open_json(dataset)
2020-05-27 21:25:08 +02:00
df = json_normalize(
2020-06-05 13:48:47 +02:00
data=json, record_path=["records"], meta=COLUMNS[dataset], errors="ignore",
2020-05-27 21:25:08 +02:00
)
2020-06-05 13:48:47 +02:00
print(df)
2020-05-27 20:13:45 +02:00
return df
2020-06-05 13:48:47 +02:00
create_dataframe("cycling-paths")