Complete missing values in columns with less data
This commit is contained in:
parent
9a8944cb5d
commit
3b08cb73f5
|
@ -19,9 +19,17 @@ def rename_columns(df_list) -> DataFrame:
|
|||
return df_list
|
||||
|
||||
|
||||
def drop_null_values(df_list):
|
||||
def process_null_values(df_list):
|
||||
drop_columns = ["tipo_marchas", "descuento", "ciudad"]
|
||||
fill_columns = ["asientos", "motor_cc", "potencia"]
|
||||
for df in df_list:
|
||||
for column in fill_columns:
|
||||
if column == "asientos":
|
||||
df[column].fillna(value=df[column].median(), inplace=True)
|
||||
else:
|
||||
df[column].fillna(
|
||||
value=df[column].str.extract("(\d+)").mean(), inplace=True
|
||||
)
|
||||
df.drop(columns=drop_columns, inplace=True)
|
||||
df.dropna(inplace=True)
|
||||
return df_list
|
||||
|
@ -80,7 +88,7 @@ def split_k_sets(df):
|
|||
def parse_data(train, test):
|
||||
df_list = construct_dataframes(train=train, test=test)
|
||||
renamed_df_list = rename_columns(df_list)
|
||||
processed_df_list = drop_null_values(renamed_df_list)
|
||||
processed_df_list = process_null_values(renamed_df_list)
|
||||
encoded_df_list = encode_columns(processed_df_list)
|
||||
train_data, train_target = balance_training_data(encoded_df_list[0])
|
||||
test_data, test_ids = split_data_target(encoded_df_list[1], dataset="test")
|
||||
|
|
Loading…
Reference in New Issue