diff --git a/src/preprocessing.py b/src/preprocessing.py index b73b8e2..c179e71 100644 --- a/src/preprocessing.py +++ b/src/preprocessing.py @@ -16,11 +16,15 @@ def process_na(df, action): def encode_columns(df): - encoder = LabelEncoder() - encoder.fit(df["Shape"]) + label_encoder = LabelEncoder() + encoded_df = df.copy() + encoded_df["Shape"] = label_encoder.fit_transform(df["Shape"]) + encoded_df["Severity"] = label_encoder.fit_transform(df["Severity"]) + return encoded_df def parse_data(source, action): df = read_csv(filepath_or_buffer=source, na_values="?") - processed_df = process_na(df, action) - return processed_df + processed_df = process_na(df=df, action=action) + encoded_df = encode_columns(df=processed_df) + return encoded_df