from iso3166 import countries as co from pandas import DataFrame, concat, read_csv from csv import QUOTE_NONNUMERIC from database.constants import ADMIN_PW def country_conversion(political_unit) -> str: if political_unit == "99": return "99" codes = co.get(political_unit) return codes.name def select_columns() -> DataFrame: min_year = 2010 fields = [ "POLITICAL_UNIT", "NAME", "WGMS_ID", "YEAR", "MEDIAN_ELEVATION", "AREA", "LENGTH", ] iter_csv = read_csv( "../data/WGMS-FoG-2019-12-B-STATE.csv", skipinitialspace=True, usecols=fields, iterator=True, chunksize=100, converters={"POLITICAL_UNIT": country_conversion}, ) data = concat([chunk[chunk["YEAR"] > min_year] for chunk in iter_csv]) return data def rename_fields(df_list): new_df_list = {} new_fields = { "POLITICAL_UNIT": "country", "NAME": "name", "WGMS_ID": "uid", "YEAR": "year", "MEDIAN_ELEVATION": "elevation", "AREA": "surface", "LENGTH": "length", } for key, value in df_list.items(): new_df_list[key] = value.rename(columns=new_fields) return new_df_list def create_databases(df): user = {"uid": [7843], "username": ["admin"], "password": [ADMIN_PW]} files = { "glacier": "../data/glacier.csv", "annual_data": "../data/annual_data.csv", "user": "../data/user.csv", } dataframes = { "glacier": df[["POLITICAL_UNIT", "NAME", "WGMS_ID"]].drop_duplicates(), "annual_data": df[["WGMS_ID", "YEAR", "AREA", "MEDIAN_ELEVATION", "LENGTH"]], "user": DataFrame(user), } renamed_dfs = rename_fields(dataframes) for key, val in renamed_dfs.items(): val.to_csv(files[key], index=False, quoting=QUOTE_NONNUMERIC) def main(): df = select_columns() create_databases(df) if __name__ == "__main__": main()