From 9a27a520b45a121f12eb170917b5a769621d64f1 Mon Sep 17 00:00:00 2001 From: coolneng Date: Wed, 27 May 2020 21:25:08 +0200 Subject: [PATCH] Add dataframes filtering --- app/constants.py | 24 ++++++++++++++++++++++++ app/preprocessing.py | 10 +++++++--- 2 files changed, 31 insertions(+), 3 deletions(-) diff --git a/app/constants.py b/app/constants.py index 6d3324e..dfdda1f 100644 --- a/app/constants.py +++ b/app/constants.py @@ -14,3 +14,27 @@ files = { "relay-parking": "data/relay-parking.json", "home-delivery": "data/home-delivery.json", } +columns = { + "cycling-paths": ["geo_shape", "statut", "record_timestamp", "complement"], + "relay-parking": [ + "societe", + "nb_places_dispositif_environ", + "parcs", + "geo_shape", + "cp", + "ville", + "adresse", + ], + "home-delivery": [ + "geo_shape", + "adresse", + "code_postal", + "nom_du_commerce", + "type_du_commerce", + "site_internet", + "record_timestamp", + "precisions", + "telephone", + "mail", + ], +} diff --git a/app/preprocessing.py b/app/preprocessing.py index 49bb7da..34169b7 100644 --- a/app/preprocessing.py +++ b/app/preprocessing.py @@ -1,6 +1,6 @@ from json import load from pandas import json_normalize, DataFrame -from app.constants import files +from constants import files, columns def open_json(dataset): @@ -11,6 +11,10 @@ def open_json(dataset): def create_dataframe(dataset): json = open_json(dataset) - data = json_normalize(data=json["records"]) - df = DataFrame.from_dict(data=data) + df = json_normalize( + data=json["records"], + record_path=["fields"], + meta=columns[dataset], + errors="ignore", + ) return df