Replace SMOTETomek with SMOTEENN

This commit is contained in:
coolneng 2021-01-01 23:37:41 +01:00
parent 29fd2a8436
commit 9a8944cb5d
Signed by: coolneng
GPG Key ID: 9893DA236405AF57
1 changed files with 5 additions and 6 deletions

View File

@ -1,7 +1,7 @@
from pandas import DataFrame, read_csv from pandas import DataFrame, read_csv
from sklearn.preprocessing import LabelEncoder from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import KFold from sklearn.model_selection import KFold
from imblearn.combine import SMOTETomek from imblearn.combine import SMOTEENN
def construct_dataframes(train, test): def construct_dataframes(train, test):
@ -20,10 +20,9 @@ def rename_columns(df_list) -> DataFrame:
def drop_null_values(df_list): def drop_null_values(df_list):
drop_columns = ["tipo_marchas", "descuento", "ciudad"]
for df in df_list: for df in df_list:
df.drop(columns="tipo_marchas", inplace=True) df.drop(columns=drop_columns, inplace=True)
df.drop(columns="descuento", inplace=True)
df.drop(columns="ciudad", inplace=True)
df.dropna(inplace=True) df.dropna(inplace=True)
return df_list return df_list
@ -63,9 +62,9 @@ def split_data_target(df, dataset):
def balance_training_data(df): def balance_training_data(df):
smote_tomek = SMOTETomek(random_state=42) smote_enn = SMOTEENN(random_state=42)
data, target = split_data_target(df=df, dataset="data") data, target = split_data_target(df=df, dataset="data")
balanced_data, balanced_target = smote_tomek.fit_resample(data, target) balanced_data, balanced_target = smote_enn.fit_resample(data, target)
balanced_data_df = DataFrame( balanced_data_df = DataFrame(
balanced_data, columns=df.columns.difference(["precio_cat"]) balanced_data, columns=df.columns.difference(["precio_cat"])
) )