From 7909def8e20055b89727553e0cf348196c4d53d4 Mon Sep 17 00:00:00 2001 From: coolneng Date: Fri, 5 Jun 2020 13:48:47 +0200 Subject: [PATCH] Document all the functions --- app/constants.py | 10 +++++----- app/preprocessing.py | 23 +++++++++++++++-------- app/request_datasets.py | 38 ++++++++++++++++++++++---------------- shell.nix | 1 + 4 files changed, 43 insertions(+), 29 deletions(-) diff --git a/app/constants.py b/app/constants.py index dfdda1f..ae6a9bb 100644 --- a/app/constants.py +++ b/app/constants.py @@ -1,20 +1,20 @@ -datasets = [ +DATASETS = [ "coronavirus-commercants-parisiens-livraison-a-domicile", "deconfinement-pistes-cyclables-temporaires", "deconfinement-parking-relais-doublement-des-places", ] -url = "https://opendata.paris.fr/api/records/1.0/search/?dataset={}&q=&rows=-1" -filenames = { +URL = "https://opendata.paris.fr/api/records/1.0/search/?dataset={}&q=&rows=-1" +FILENAMES = { "coronavirus-commercants-parisiens-livraison-a-domicile": "home-delivery", "deconfinement-pistes-cyclables-temporaires": "cycling-paths", "deconfinement-parking-relais-doublement-des-places": "relay-parking", } -files = { +FILES = { "cycling-paths": "data/cycling-paths.json", "relay-parking": "data/relay-parking.json", "home-delivery": "data/home-delivery.json", } -columns = { +COLUMNS = { "cycling-paths": ["geo_shape", "statut", "record_timestamp", "complement"], "relay-parking": [ "societe", diff --git a/app/preprocessing.py b/app/preprocessing.py index 34169b7..bd451dc 100644 --- a/app/preprocessing.py +++ b/app/preprocessing.py @@ -1,20 +1,27 @@ from json import load from pandas import json_normalize, DataFrame -from constants import files, columns +from constants import FILES, COLUMNS -def open_json(dataset): - with open(files[dataset]) as f: +def open_json(dataset) -> dict: + """ + Loads a dictionary with data from a JSON file + """ + with open(FILES[dataset]) as f: json = load(f) return json -def create_dataframe(dataset): +def create_dataframe(dataset) -> DataFrame: + """ + Creates a DataFrame from a JSON file + """ json = open_json(dataset) df = json_normalize( - data=json["records"], - record_path=["fields"], - meta=columns[dataset], - errors="ignore", + data=json, record_path=["records"], meta=COLUMNS[dataset], errors="ignore", ) + print(df) return df + + +create_dataframe("cycling-paths") diff --git a/app/request_datasets.py b/app/request_datasets.py index 0445acc..8b35600 100644 --- a/app/request_datasets.py +++ b/app/request_datasets.py @@ -1,28 +1,34 @@ -from constants import datasets, url +from json import dump from requests import get -from constants import datasets, url, filenames -from requests import get, codes -from json import dump, dumps +from constants import FILENAMES, URL -def save_json(data, filename): +def format_url(dataset) -> str: + """ + Constructs the API's URL for the requested dataset + """ + link = URL.format(dataset) + return link + + +def save_json(data, dataset): + """ + Dumps the data into a JSON file + """ data_dir = "data/" - with open(data_dir + filename + ".json", "w") as f: + with open(data_dir + FILENAMES[dataset] + ".json", "w") as f: dump(data, f, ensure_ascii=False) -def request_dataset(url, filename): +def request_dataset(dataset): + """ + Fetches the requested dataset from opendata's API + """ + url = format_url(dataset) response = get(url) response.raise_for_status() data = response.json() - save_json(data, filename) + save_json(data=data, dataset=dataset) -def format_url(): - for set in datasets: - link = url.format(set) - request_dataset(link, filenames[set]) - - -if __name__ == "__main__": - format_url() +request_dataset("deconfinement-pistes-cyclables-temporaires") diff --git a/shell.nix b/shell.nix index c32bc53..589b92a 100644 --- a/shell.nix +++ b/shell.nix @@ -17,5 +17,6 @@ pkgs.mkShell { python-language-server pyls-black pyls-isort + pyls-mypy ]; }