Complete missing values in columns with less data
This commit is contained in:
parent
9a8944cb5d
commit
3b08cb73f5
|
@ -19,9 +19,17 @@ def rename_columns(df_list) -> DataFrame:
|
||||||
return df_list
|
return df_list
|
||||||
|
|
||||||
|
|
||||||
def drop_null_values(df_list):
|
def process_null_values(df_list):
|
||||||
drop_columns = ["tipo_marchas", "descuento", "ciudad"]
|
drop_columns = ["tipo_marchas", "descuento", "ciudad"]
|
||||||
|
fill_columns = ["asientos", "motor_cc", "potencia"]
|
||||||
for df in df_list:
|
for df in df_list:
|
||||||
|
for column in fill_columns:
|
||||||
|
if column == "asientos":
|
||||||
|
df[column].fillna(value=df[column].median(), inplace=True)
|
||||||
|
else:
|
||||||
|
df[column].fillna(
|
||||||
|
value=df[column].str.extract("(\d+)").mean(), inplace=True
|
||||||
|
)
|
||||||
df.drop(columns=drop_columns, inplace=True)
|
df.drop(columns=drop_columns, inplace=True)
|
||||||
df.dropna(inplace=True)
|
df.dropna(inplace=True)
|
||||||
return df_list
|
return df_list
|
||||||
|
@ -80,7 +88,7 @@ def split_k_sets(df):
|
||||||
def parse_data(train, test):
|
def parse_data(train, test):
|
||||||
df_list = construct_dataframes(train=train, test=test)
|
df_list = construct_dataframes(train=train, test=test)
|
||||||
renamed_df_list = rename_columns(df_list)
|
renamed_df_list = rename_columns(df_list)
|
||||||
processed_df_list = drop_null_values(renamed_df_list)
|
processed_df_list = process_null_values(renamed_df_list)
|
||||||
encoded_df_list = encode_columns(processed_df_list)
|
encoded_df_list = encode_columns(processed_df_list)
|
||||||
train_data, train_target = balance_training_data(encoded_df_list[0])
|
train_data, train_target = balance_training_data(encoded_df_list[0])
|
||||||
test_data, test_ids = split_data_target(encoded_df_list[1], dataset="test")
|
test_data, test_ids = split_data_target(encoded_df_list[1], dataset="test")
|
||||||
|
|
Loading…
Reference in New Issue