From c2d6c2363b85d794f4cfe0955922f83d44b897e2 Mon Sep 17 00:00:00 2001 From: coolneng Date: Sun, 5 Jan 2020 01:00:06 +0100 Subject: [PATCH] Rename code folder and add filters to parser --- Code/database/db_setup.py | 12 ++--- Code/database/parse_csv.py | 22 +++++++-- Design.org | 6 +-- {Code => code}/Pipfile | 0 {Code => code}/Pipfile.lock | 0 .../fields.py => code/database/columns.py | 0 code/database/db_setup.py | 49 +++++++++++++++++++ {Code => code}/database/mariadb_setup.sh | 0 code/database/parse_csv.py | 16 ++++++ docs/Project.org | 2 +- 10 files changed, 93 insertions(+), 14 deletions(-) rename {Code => code}/Pipfile (100%) rename {Code => code}/Pipfile.lock (100%) rename Code/database/fields.py => code/database/columns.py (100%) create mode 100644 code/database/db_setup.py rename {Code => code}/database/mariadb_setup.sh (100%) create mode 100644 code/database/parse_csv.py diff --git a/Code/database/db_setup.py b/Code/database/db_setup.py index 33accea..00eaf77 100644 --- a/Code/database/db_setup.py +++ b/Code/database/db_setup.py @@ -1,14 +1,16 @@ -from subprocess import call +from subprocess import run from sys import argv from flask_sqlalchemy import SQLAlchemy from flask import Flask from fields import Glacier, Annual_Data, Annual_Change, User +global db + def create_database(db_name, db_user, db_pw): script = "mariadb_setup.sh" - output = call([script + db_name + db_user + db_pw]) - if output != 0: + output = run([script + db_name + db_user + db_pw]) + if output.returncode != 0: print("Error: couldn't create database") exit() @@ -31,9 +33,7 @@ def create_tables(db): def main(): if len(argv) != 3: - print( - "Usage: " + argv[0] + " " - ) + print("Usage: " + argv[0] + " ") exit() db_name = str(argv[1]) diff --git a/Code/database/parse_csv.py b/Code/database/parse_csv.py index 869059b..ecbb551 100644 --- a/Code/database/parse_csv.py +++ b/Code/database/parse_csv.py @@ -1,16 +1,30 @@ from pandas import read_csv, concat, DataFrame -from iso3166 import countries +from iso3166 import countries as co -def select_columns() -> [DataFrame]: +def country_conversion(political_unit) -> str: + codes = co.get(political_unit) + return codes.name + + +def select_columns() -> DataFrame: min_year = 2010 - fields = ["POLITICAL_UNIT", "WGMS_ID", "YEAR"] + fields = [ + "POLITICAL_UNIT", + "WGMS_ID", + "YEAR", + "AREA_SURVEY_YEAR", + "AREA_CHANGE", + "THICKNESS CHANGE", + "VOLUME_CHANGE", + ] iter_csv = read_csv( - "../../Assets/WGMS-FoG-2019-12-D-CHANGE.csv", + "../../data/WGMS-FoG-2019-12-D-CHANGE.csv", skipinitialspace=True, usecols=fields, iterator=True, chunksize=100, + converters={"YEAR": country_conversion}, ) data = concat([chunk[chunk["YEAR"] > min_year] for chunk in iter_csv]) return data diff --git a/Design.org b/Design.org index 5afbf75..14918bf 100644 --- a/Design.org +++ b/Design.org @@ -51,9 +51,9 @@ CLOSED: [2020-01-03 Fri 00:44] - [X] Connection - [X] Creation from script - [X] Creation of tables via class -**** TODO Parser [0/4] [0%] -- [ ] Select useful fiels -- [ ] Convert PU to Country (ISO 3166) +**** TODO Parser [2/4] [50%] +- [X] Select useful fiels +- [X] Convert PU to Country (ISO 3166) - [ ] Arithmetic operations for yearly changes - [ ] Insert into database **** NEXT Flask framework diff --git a/Code/Pipfile b/code/Pipfile similarity index 100% rename from Code/Pipfile rename to code/Pipfile diff --git a/Code/Pipfile.lock b/code/Pipfile.lock similarity index 100% rename from Code/Pipfile.lock rename to code/Pipfile.lock diff --git a/Code/database/fields.py b/code/database/columns.py similarity index 100% rename from Code/database/fields.py rename to code/database/columns.py diff --git a/code/database/db_setup.py b/code/database/db_setup.py new file mode 100644 index 0000000..85e712d --- /dev/null +++ b/code/database/db_setup.py @@ -0,0 +1,49 @@ +from subprocess import call +from sys import argv +from flask_sqlalchemy import SQLAlchemy +from flask import Flask +from fields import Glacier, Annual_Data, Annual_Change, User + +global db + + +def create_database(db_name, db_user, db_pw): + script = "mariadb_setup.sh" + output = call([script + db_name + db_user + db_pw]) + if output != 0: + print("Error: couldn't create database") + exit() + + +def create_connection(db_name, db_user, db_pw): + host = "localhost:3306" + connection_uri = "mysql+pymysql://{user}:{pw}@{url}/{db}".format( + user=db_user, pw=db_pw, url=host, db=db_name + ) + app = Flask(__name__) + app.config["SQLALCHEMY_DATABASE_URI"] = connection_uri + app.config["SQLALCHEMY_TRACK_MODIFICATIONS"] = False + global db + db = SQLAlchemy(app) + + +def create_tables(db): + db.create_all() + + +def main(): + if len(argv) != 3: + print("Usage: " + argv[0] + " ") + exit() + + db_name = str(argv[1]) + db_user = str(argv[2]) + db_password = str(argv[3]) + + create_database(db_name, db_user, db_password) + create_connection(db_name, db_user, db_password) + create_tables(db) + + +if __name__ == "__main__": + main() diff --git a/Code/database/mariadb_setup.sh b/code/database/mariadb_setup.sh similarity index 100% rename from Code/database/mariadb_setup.sh rename to code/database/mariadb_setup.sh diff --git a/code/database/parse_csv.py b/code/database/parse_csv.py new file mode 100644 index 0000000..869059b --- /dev/null +++ b/code/database/parse_csv.py @@ -0,0 +1,16 @@ +from pandas import read_csv, concat, DataFrame +from iso3166 import countries + + +def select_columns() -> [DataFrame]: + min_year = 2010 + fields = ["POLITICAL_UNIT", "WGMS_ID", "YEAR"] + iter_csv = read_csv( + "../../Assets/WGMS-FoG-2019-12-D-CHANGE.csv", + skipinitialspace=True, + usecols=fields, + iterator=True, + chunksize=100, + ) + data = concat([chunk[chunk["YEAR"] > min_year] for chunk in iter_csv]) + return data diff --git a/docs/Project.org b/docs/Project.org index 7c4b435..6fb7220 100644 --- a/docs/Project.org +++ b/docs/Project.org @@ -19,7 +19,7 @@ datos relevantes para estudios acerca del cambio climático, y acotando éstos a 1. *RD1*: Datos del glaciar - País - /Cadena de 30 caracteres máximo/ - - Nombre del glaciar - /Cadena de 30 caracteres máximo/ + - Nombre del glaciar - /Cadena de 60 caracteres máximo/ - ID del glaciar (Compatible con la WGMS) - /Entero de 5 dígitos/ 2. *RD2*: Datos anuales de un glaciar