diff --git a/app/data_request.py b/app/data_request.py index 8d5069c..bd29f3b 100644 --- a/app/data_request.py +++ b/app/data_request.py @@ -13,14 +13,6 @@ def format_url(dataset) -> str: return link -def save_json(data, dataset): - """ - Dumps the data into a JSON file - """ - with open(FILES[dataset], "w") as f: - dump(data, f, ensure_ascii=False) - - def request_dataset(dataset): """ Fetches the requested dataset from opendata's API @@ -30,4 +22,4 @@ def request_dataset(dataset): response = get(url) response.raise_for_status() data = response.json() - save_json(data=data, dataset=dataset) + return data diff --git a/app/preprocessing.py b/app/preprocessing.py index 4fc633b..d506ef6 100644 --- a/app/preprocessing.py +++ b/app/preprocessing.py @@ -6,21 +6,11 @@ from constants import COLUMNS, FILES from app.data_request import request_dataset -def open_json(dataset) -> dict: - """ - Loads a dictionary with data from a JSON file - """ - with open(FILES[dataset]) as f: - json = load(f) - return json - - def create_dataframe(dataset) -> DataFrame: """ Creates a DataFrame from a JSON file """ - request_dataset(dataset) - json = open_json(dataset) + json = request_dataset(dataset) df = json_normalize(data=json, record_path=["records"], errors="ignore",) filtered_df = df.filter(items=COLUMNS[dataset]) return filtered_df diff --git a/data/.keep b/data/.keep deleted file mode 100644 index e69de29..0000000 diff --git a/tests/preprocessing_test.py b/tests/preprocessing_test.py index 15c0f2b..21c11ae 100644 --- a/tests/preprocessing_test.py +++ b/tests/preprocessing_test.py @@ -1,5 +1,3 @@ -from os import remove - from pandas import DataFrame from requests import get @@ -22,8 +20,6 @@ def test_dataframe_creation(): Verifes that the DataFrames are created and filtered properly """ for dataset in DATASETS: - request_dataset(dataset) df = create_dataframe(dataset) - remove(FILES[dataset]) assert isinstance(df, DataFrame) assert all(df.columns == COLUMNS[dataset])