2020-01-05 02:51:14 +01:00
|
|
|
from iso3166 import countries as co
|
2020-01-06 06:34:35 +01:00
|
|
|
from pandas import DataFrame, concat, read_csv
|
|
|
|
from csv import QUOTE_NONNUMERIC
|
2020-01-08 21:31:00 +01:00
|
|
|
from database.constants import ADMIN_PW
|
2020-01-08 22:14:16 +01:00
|
|
|
from os import path
|
2020-01-09 03:17:31 +01:00
|
|
|
from werkzeug.security import generate_password_hash
|
2020-01-05 01:00:06 +01:00
|
|
|
|
|
|
|
|
2020-01-05 02:51:14 +01:00
|
|
|
def country_conversion(political_unit) -> str:
|
2020-01-06 11:12:55 +01:00
|
|
|
if political_unit == "99":
|
|
|
|
return "99"
|
2020-01-05 02:51:14 +01:00
|
|
|
codes = co.get(political_unit)
|
|
|
|
return codes.name
|
|
|
|
|
|
|
|
|
|
|
|
def select_columns() -> DataFrame:
|
2020-01-05 01:00:06 +01:00
|
|
|
min_year = 2010
|
2020-01-05 02:51:14 +01:00
|
|
|
fields = [
|
|
|
|
"POLITICAL_UNIT",
|
2020-01-06 08:31:21 +01:00
|
|
|
"NAME",
|
2020-01-05 02:51:14 +01:00
|
|
|
"WGMS_ID",
|
|
|
|
"YEAR",
|
2020-01-06 11:12:55 +01:00
|
|
|
"MEDIAN_ELEVATION",
|
|
|
|
"AREA",
|
|
|
|
"LENGTH",
|
2020-01-05 02:51:14 +01:00
|
|
|
]
|
2020-01-05 01:00:06 +01:00
|
|
|
iter_csv = read_csv(
|
2020-01-06 11:12:55 +01:00
|
|
|
"../data/WGMS-FoG-2019-12-B-STATE.csv",
|
2020-01-05 01:00:06 +01:00
|
|
|
skipinitialspace=True,
|
|
|
|
usecols=fields,
|
|
|
|
iterator=True,
|
|
|
|
chunksize=100,
|
2020-01-06 06:34:35 +01:00
|
|
|
converters={"POLITICAL_UNIT": country_conversion},
|
2020-01-05 01:00:06 +01:00
|
|
|
)
|
|
|
|
data = concat([chunk[chunk["YEAR"] > min_year] for chunk in iter_csv])
|
|
|
|
return data
|
2020-01-06 06:34:35 +01:00
|
|
|
|
|
|
|
|
2020-01-08 01:23:00 +01:00
|
|
|
def rename_fields(df_list):
|
|
|
|
new_df_list = {}
|
|
|
|
new_fields = {
|
|
|
|
"POLITICAL_UNIT": "country",
|
|
|
|
"NAME": "name",
|
2020-01-09 03:17:31 +01:00
|
|
|
"WGMS_ID": "id",
|
2020-01-08 01:23:00 +01:00
|
|
|
"YEAR": "year",
|
|
|
|
"MEDIAN_ELEVATION": "elevation",
|
|
|
|
"AREA": "surface",
|
|
|
|
"LENGTH": "length",
|
|
|
|
}
|
|
|
|
for key, value in df_list.items():
|
|
|
|
new_df_list[key] = value.rename(columns=new_fields)
|
|
|
|
return new_df_list
|
|
|
|
|
|
|
|
|
2020-01-06 08:31:21 +01:00
|
|
|
def create_databases(df):
|
|
|
|
files = {
|
|
|
|
"glacier": "../data/glacier.csv",
|
2020-01-08 01:47:12 +01:00
|
|
|
"annual_data": "../data/annual_data.csv",
|
|
|
|
"user": "../data/user.csv",
|
2020-01-06 08:31:21 +01:00
|
|
|
}
|
2020-01-09 03:17:31 +01:00
|
|
|
user = {
|
|
|
|
"id": [7843],
|
|
|
|
"username": ["admin"],
|
|
|
|
"password_hash": [generate_password_hash(ADMIN_PW)],
|
|
|
|
}
|
2020-01-06 08:31:21 +01:00
|
|
|
dataframes = {
|
|
|
|
"glacier": df[["POLITICAL_UNIT", "NAME", "WGMS_ID"]].drop_duplicates(),
|
2020-01-08 01:47:12 +01:00
|
|
|
"annual_data": df[["WGMS_ID", "YEAR", "AREA", "MEDIAN_ELEVATION", "LENGTH"]],
|
|
|
|
"user": DataFrame(user),
|
2020-01-06 08:31:21 +01:00
|
|
|
}
|
2020-01-08 01:23:00 +01:00
|
|
|
renamed_dfs = rename_fields(dataframes)
|
|
|
|
for key, val in renamed_dfs.items():
|
2020-01-08 22:14:16 +01:00
|
|
|
if path.isfile(files[key]):
|
|
|
|
continue
|
2020-01-06 08:31:21 +01:00
|
|
|
val.to_csv(files[key], index=False, quoting=QUOTE_NONNUMERIC)
|
|
|
|
|
|
|
|
|
2020-01-06 06:34:35 +01:00
|
|
|
def main():
|
|
|
|
df = select_columns()
|
2020-01-06 08:31:21 +01:00
|
|
|
create_databases(df)
|
2020-01-06 06:34:35 +01:00
|
|
|
|
|
|
|
|
|
|
|
if __name__ == "__main__":
|
|
|
|
main()
|