Subset the dataframe columns after case filtering
This commit is contained in:
parent
e15685d575
commit
8bcc7fa7bc
|
@ -22,30 +22,17 @@ def process_na(df, action):
|
|||
|
||||
def filter_dataframe(df):
|
||||
relevant_columns = [
|
||||
"HORA",
|
||||
"DIASEMANA",
|
||||
"COMUNIDAD_AUTONOMA",
|
||||
"ISLA",
|
||||
"TOT_HERIDOS_LEVES",
|
||||
"TOT_HERIDOS_GRAVES",
|
||||
"TOT_VEHICULOS_IMPLICADOS",
|
||||
"TOT_MUERTOS",
|
||||
"TIPO_VIA",
|
||||
"LUMINOSIDAD",
|
||||
"FACTORES_ATMOSFERICOS",
|
||||
]
|
||||
filtered_df = df.filter(items=relevant_columns)
|
||||
return filtered_df
|
||||
|
||||
|
||||
def normalize_numerical_values(df):
|
||||
cols = [
|
||||
"TOT_HERIDOS_LEVES",
|
||||
"TOT_HERIDOS_GRAVES",
|
||||
"TOT_VEHICULOS_IMPLICADOS",
|
||||
"TOT_MUERTOS",
|
||||
]
|
||||
filtered_df = df.filter(items=cols)
|
||||
filtered_df = filter_dataframe(df=df)
|
||||
normalized_data = normalize(X=filtered_df)
|
||||
normalized_df = DataFrame(data=normalized_data, columns=filtered_df.columns)
|
||||
df.update(normalized_df)
|
||||
|
@ -55,6 +42,5 @@ def normalize_numerical_values(df):
|
|||
def parse_data(source, action):
|
||||
df = read_csv(filepath_or_buffer=source, na_values="?")
|
||||
processed_df = process_na(df=df, action=action)
|
||||
filtered_df = filter_dataframe(df=processed_df)
|
||||
normalized_df = normalize_numerical_values(df=filtered_df)
|
||||
normalized_df = normalize_numerical_values(df=processed_df)
|
||||
return normalized_df
|
||||
|
|
|
@ -7,7 +7,7 @@ from seaborn import heatmap, set_style, set_theme, pairplot
|
|||
from sklearn.metrics import silhouette_score, calinski_harabasz_score
|
||||
from sklearn.cluster import KMeans, Birch, AffinityPropagation, MeanShift, DBSCAN
|
||||
|
||||
from preprocessing import parse_data
|
||||
from preprocessing import parse_data, filter_dataframe
|
||||
|
||||
|
||||
def choose_model(model):
|
||||
|
@ -160,9 +160,10 @@ def main():
|
|||
data = parse_data(source="data/accidentes_2013.csv", action=str(argv[1]))
|
||||
individual_result, complete_results = create_result_dataframes()
|
||||
case_data = construct_case(df=data, choice=case)
|
||||
filtered_data = filter_dataframe(df=case_data)
|
||||
for model in models:
|
||||
model_results = predict_data(
|
||||
data=case_data,
|
||||
data=filtered_data,
|
||||
model=model,
|
||||
results=individual_result,
|
||||
sample=sample,
|
||||
|
|
Loading…
Reference in New Issue