2020-01-04 17:44:51 +01:00
|
|
|
from pandas import read_csv, concat, DataFrame
|
2020-01-05 01:00:06 +01:00
|
|
|
from iso3166 import countries as co
|
2020-01-04 17:44:51 +01:00
|
|
|
|
|
|
|
|
2020-01-05 01:00:06 +01:00
|
|
|
def country_conversion(political_unit) -> str:
|
|
|
|
codes = co.get(political_unit)
|
|
|
|
return codes.name
|
|
|
|
|
|
|
|
|
|
|
|
def select_columns() -> DataFrame:
|
2020-01-04 17:44:51 +01:00
|
|
|
min_year = 2010
|
2020-01-05 01:00:06 +01:00
|
|
|
fields = [
|
|
|
|
"POLITICAL_UNIT",
|
|
|
|
"WGMS_ID",
|
|
|
|
"YEAR",
|
|
|
|
"AREA_SURVEY_YEAR",
|
|
|
|
"AREA_CHANGE",
|
|
|
|
"THICKNESS CHANGE",
|
|
|
|
"VOLUME_CHANGE",
|
|
|
|
]
|
2020-01-04 17:44:51 +01:00
|
|
|
iter_csv = read_csv(
|
2020-01-05 01:00:06 +01:00
|
|
|
"../../data/WGMS-FoG-2019-12-D-CHANGE.csv",
|
2020-01-04 17:44:51 +01:00
|
|
|
skipinitialspace=True,
|
|
|
|
usecols=fields,
|
|
|
|
iterator=True,
|
|
|
|
chunksize=100,
|
2020-01-05 01:00:06 +01:00
|
|
|
converters={"YEAR": country_conversion},
|
2020-01-04 17:44:51 +01:00
|
|
|
)
|
|
|
|
data = concat([chunk[chunk["YEAR"] > min_year] for chunk in iter_csv])
|
|
|
|
return data
|