igdb/code/database/parser.py

42 lines
970 B
Python

from iso3166 import countries as co
from pandas import DataFrame, concat, read_csv
from csv import QUOTE_NONNUMERIC
def country_conversion(political_unit) -> str:
codes = co.get(political_unit)
return codes.name
def select_columns() -> DataFrame:
min_year = 2010
fields = [
"POLITICAL_UNIT",
"WGMS_ID",
"YEAR",
"AREA_SURVEY_YEAR",
"AREA_CHANGE",
"THICKNESS_CHG",
"VOLUME_CHANGE",
]
iter_csv = read_csv(
"../data/WGMS-FoG-2019-12-D-CHANGE.csv",
skipinitialspace=True,
usecols=fields,
iterator=True,
chunksize=100,
converters={"POLITICAL_UNIT": country_conversion},
)
data = concat([chunk[chunk["YEAR"] > min_year] for chunk in iter_csv])
return data
def main():
df = select_columns()
database = "../data/igdb.csv"
df.to_csv(database, index=False, quoting=QUOTE_NONNUMERIC)
if __name__ == "__main__":
main()