diff --git a/src/P2/preprocessing.py b/src/P2/preprocessing.py index 21a83ea..e3871f5 100644 --- a/src/P2/preprocessing.py +++ b/src/P2/preprocessing.py @@ -1,4 +1,5 @@ from pandas import DataFrame, read_csv +from sklearn.preprocessing import normalize def replace_values(df) -> DataFrame: @@ -37,7 +38,21 @@ def filter_dataframe(df) -> DataFrame: return filtered_df +def normalize_numerical_values(df) -> DataFrame: + cols = [ + "TOT_HERIDOS_LEVES", + "TOT_HERIDOS_GRAVES", + "TOT_VEHICULOS_IMPLICADOS", + "TOT_MUERTOS", + ] + filtered_df = df.filter(items=cols) + normalized_df = normalize(X=filtered_df) + updated_df = df.update(normalized_df) + return updated_df + + def parse_data(source, action) -> DataFrame: df = read_csv(filepath_or_buffer=source, na_values="?") processed_df = process_na(df=df, action=action) - return processed_df + normalized_df = normalize_numerical_values(df=processed_df) + return normalized_df