From cbc1c08350f3046971f26d68d67f4f18971d77b7 Mon Sep 17 00:00:00 2001 From: coolneng Date: Mon, 6 Jan 2020 06:34:35 +0100 Subject: [PATCH] Add csv parser --- code/database/db_setup.py | 6 +++--- code/database/{parse_csv.py => parser.py} | 19 +++++++++++++++---- code/database/storage.py | 0 docs/Project.org | 2 +- 4 files changed, 19 insertions(+), 8 deletions(-) rename code/database/{parse_csv.py => parser.py} (59%) create mode 100644 code/database/storage.py diff --git a/code/database/db_setup.py b/code/database/db_setup.py index c50c8f6..6da3d48 100644 --- a/code/database/db_setup.py +++ b/code/database/db_setup.py @@ -30,8 +30,8 @@ db = create_connection() class Glacier(db.Model): uid = db.Column(db.Integer, primary_key=True) - country = db.Column(db.String(30)) - name = db.Column(db.String(30)) + country = db.Column(db.String(60)) + name = db.Column(db.String(60)) def __init__(self, uid, country, name): self.uid = uid @@ -73,7 +73,7 @@ class Annual_Change(db.Model): self.year = year -class Users(db.Model): +class User(db.Model): uid = db.Column(db.Integer, primary_key=True) registration_date = db.Column(db.DateTime, nullable=False) diff --git a/code/database/parse_csv.py b/code/database/parser.py similarity index 59% rename from code/database/parse_csv.py rename to code/database/parser.py index ecbb551..1b85a71 100644 --- a/code/database/parse_csv.py +++ b/code/database/parser.py @@ -1,5 +1,6 @@ -from pandas import read_csv, concat, DataFrame from iso3166 import countries as co +from pandas import DataFrame, concat, read_csv +from csv import QUOTE_NONNUMERIC def country_conversion(political_unit) -> str: @@ -15,16 +16,26 @@ def select_columns() -> DataFrame: "YEAR", "AREA_SURVEY_YEAR", "AREA_CHANGE", - "THICKNESS CHANGE", + "THICKNESS_CHG", "VOLUME_CHANGE", ] iter_csv = read_csv( - "../../data/WGMS-FoG-2019-12-D-CHANGE.csv", + "../data/WGMS-FoG-2019-12-D-CHANGE.csv", skipinitialspace=True, usecols=fields, iterator=True, chunksize=100, - converters={"YEAR": country_conversion}, + converters={"POLITICAL_UNIT": country_conversion}, ) data = concat([chunk[chunk["YEAR"] > min_year] for chunk in iter_csv]) return data + + +def main(): + df = select_columns() + database = "../data/igdb.csv" + df.to_csv(database, index=False, quoting=QUOTE_NONNUMERIC) + + +if __name__ == "__main__": + main() diff --git a/code/database/storage.py b/code/database/storage.py new file mode 100644 index 0000000..e69de29 diff --git a/docs/Project.org b/docs/Project.org index 6fb7220..764ff73 100644 --- a/docs/Project.org +++ b/docs/Project.org @@ -18,7 +18,7 @@ datos relevantes para estudios acerca del cambio climático, y acotando éstos a *** Datos 1. *RD1*: Datos del glaciar - - País - /Cadena de 30 caracteres máximo/ + - País - /Cadena de 60 caracteres máximo/ - Nombre del glaciar - /Cadena de 60 caracteres máximo/ - ID del glaciar (Compatible con la WGMS) - /Entero de 5 dígitos/