Replace SMOTETomek with SMOTEENN
This commit is contained in:
parent
29fd2a8436
commit
9a8944cb5d
|
@ -1,7 +1,7 @@
|
||||||
from pandas import DataFrame, read_csv
|
from pandas import DataFrame, read_csv
|
||||||
from sklearn.preprocessing import LabelEncoder
|
from sklearn.preprocessing import LabelEncoder
|
||||||
from sklearn.model_selection import KFold
|
from sklearn.model_selection import KFold
|
||||||
from imblearn.combine import SMOTETomek
|
from imblearn.combine import SMOTEENN
|
||||||
|
|
||||||
|
|
||||||
def construct_dataframes(train, test):
|
def construct_dataframes(train, test):
|
||||||
|
@ -20,10 +20,9 @@ def rename_columns(df_list) -> DataFrame:
|
||||||
|
|
||||||
|
|
||||||
def drop_null_values(df_list):
|
def drop_null_values(df_list):
|
||||||
|
drop_columns = ["tipo_marchas", "descuento", "ciudad"]
|
||||||
for df in df_list:
|
for df in df_list:
|
||||||
df.drop(columns="tipo_marchas", inplace=True)
|
df.drop(columns=drop_columns, inplace=True)
|
||||||
df.drop(columns="descuento", inplace=True)
|
|
||||||
df.drop(columns="ciudad", inplace=True)
|
|
||||||
df.dropna(inplace=True)
|
df.dropna(inplace=True)
|
||||||
return df_list
|
return df_list
|
||||||
|
|
||||||
|
@ -63,9 +62,9 @@ def split_data_target(df, dataset):
|
||||||
|
|
||||||
|
|
||||||
def balance_training_data(df):
|
def balance_training_data(df):
|
||||||
smote_tomek = SMOTETomek(random_state=42)
|
smote_enn = SMOTEENN(random_state=42)
|
||||||
data, target = split_data_target(df=df, dataset="data")
|
data, target = split_data_target(df=df, dataset="data")
|
||||||
balanced_data, balanced_target = smote_tomek.fit_resample(data, target)
|
balanced_data, balanced_target = smote_enn.fit_resample(data, target)
|
||||||
balanced_data_df = DataFrame(
|
balanced_data_df = DataFrame(
|
||||||
balanced_data, columns=df.columns.difference(["precio_cat"])
|
balanced_data, columns=df.columns.difference(["precio_cat"])
|
||||||
)
|
)
|
||||||
|
|
Loading…
Reference in New Issue