Remove type hints and the rename_model function
This commit is contained in:
parent
a3798a781f
commit
e15685d575
|
@ -29,8 +29,9 @@ def choose_model(model):
|
|||
|
||||
|
||||
def predict_data(data, target, model, results):
|
||||
model = choose_model(model)
|
||||
if model == "knn":
|
||||
model_name = model
|
||||
model = choose_model(model=model)
|
||||
if model_name == "knn":
|
||||
data = scale(data)
|
||||
confusion_matrices, auc, fpr, tpr = [], [], [], []
|
||||
for train_index, test_index in split_k_sets(data):
|
||||
|
@ -43,7 +44,7 @@ def predict_data(data, target, model, results):
|
|||
tpr.append(tpr_item)
|
||||
populated_results = populate_results(
|
||||
df=results,
|
||||
model=model,
|
||||
model=model_name,
|
||||
fpr=mean(fpr, axis=0),
|
||||
tpr=mean(tpr, axis=0),
|
||||
auc=mean(auc),
|
||||
|
@ -122,27 +123,13 @@ def create_result_dataframes():
|
|||
|
||||
|
||||
def populate_results(df, model, fpr, tpr, auc, confusion_matrix):
|
||||
renamed_model = rename_model(model=f"{model}")
|
||||
columns = ["model", "fpr", "tpr", "auc", "confusion_matrix"]
|
||||
values = [renamed_model, fpr, tpr, auc, confusion_matrix]
|
||||
values = [model, fpr, tpr, auc, confusion_matrix]
|
||||
dictionary = dict(zip(columns, values))
|
||||
populated_df = df.append(dictionary, ignore_index=True)
|
||||
return populated_df
|
||||
|
||||
|
||||
def rename_model(model):
|
||||
short_name = ["gnb", "svc", "knn", "tree", "neuralnet"]
|
||||
models = [
|
||||
"GaussianNB()",
|
||||
"LinearSVC(random_state=42)",
|
||||
"KNeighborsClassifier(n_neighbors=10)",
|
||||
"DecisionTreeClassifier(random_state=42)",
|
||||
"MLPClassifier(hidden_layer_sizes=10)",
|
||||
]
|
||||
mapping = dict(zip(models, short_name))
|
||||
return mapping[model]
|
||||
|
||||
|
||||
def transform_dataframe(data, target):
|
||||
joined_df = data.join(target)
|
||||
binned_df = joined_df.copy()
|
||||
|
|
|
@ -2,13 +2,13 @@ from pandas import DataFrame, read_csv
|
|||
from sklearn.preprocessing import normalize
|
||||
|
||||
|
||||
def replace_values(df) -> DataFrame:
|
||||
def replace_values(df):
|
||||
for column in df.columns:
|
||||
df[column].fillna(value=df[column].mean(), inplace=True)
|
||||
return df
|
||||
|
||||
|
||||
def process_na(df, action) -> DataFrame:
|
||||
def process_na(df, action):
|
||||
if action == "drop":
|
||||
return df.dropna()
|
||||
elif action == "fill":
|
||||
|
@ -20,7 +20,7 @@ def process_na(df, action) -> DataFrame:
|
|||
exit()
|
||||
|
||||
|
||||
def filter_dataframe(df) -> DataFrame:
|
||||
def filter_dataframe(df):
|
||||
relevant_columns = [
|
||||
"HORA",
|
||||
"DIASEMANA",
|
||||
|
@ -38,18 +38,7 @@ def filter_dataframe(df) -> DataFrame:
|
|||
return filtered_df
|
||||
|
||||
|
||||
def choose_numerical_values(df):
|
||||
cols = [
|
||||
"TOT_HERIDOS_LEVES",
|
||||
"TOT_HERIDOS_GRAVES",
|
||||
"TOT_VEHICULOS_IMPLICADOS",
|
||||
"TOT_MUERTOS",
|
||||
]
|
||||
filtered_df = df.filter(items=cols)
|
||||
return filtered_df
|
||||
|
||||
|
||||
def normalize_numerical_values(df) -> DataFrame:
|
||||
def normalize_numerical_values(df):
|
||||
cols = [
|
||||
"TOT_HERIDOS_LEVES",
|
||||
"TOT_HERIDOS_GRAVES",
|
||||
|
@ -63,7 +52,7 @@ def normalize_numerical_values(df) -> DataFrame:
|
|||
return df
|
||||
|
||||
|
||||
def parse_data(source, action) -> DataFrame:
|
||||
def parse_data(source, action):
|
||||
df = read_csv(filepath_or_buffer=source, na_values="?")
|
||||
processed_df = process_na(df=df, action=action)
|
||||
filtered_df = filter_dataframe(df=processed_df)
|
||||
|
|
|
@ -1,5 +1,4 @@
|
|||
import time
|
||||
from typing import Union
|
||||
from sys import argv
|
||||
|
||||
from matplotlib.pyplot import *
|
||||
|
@ -8,12 +7,10 @@ from seaborn import heatmap, set_style, set_theme, pairplot
|
|||
from sklearn.metrics import silhouette_score, calinski_harabasz_score
|
||||
from sklearn.cluster import KMeans, Birch, AffinityPropagation, MeanShift, DBSCAN
|
||||
|
||||
from preprocessing import parse_data, choose_numerical_values
|
||||
from preprocessing import parse_data
|
||||
|
||||
|
||||
def choose_model(
|
||||
model,
|
||||
) -> Union[KMeans, Birch, AffinityPropagation, MeanShift, DBSCAN, None]:
|
||||
def choose_model(model):
|
||||
if model == "kmeans":
|
||||
return KMeans(random_state=42)
|
||||
elif model == "birch":
|
||||
|
@ -26,22 +23,22 @@ def choose_model(
|
|||
return DBSCAN()
|
||||
|
||||
|
||||
def predict_data(data, model, results, sample) -> DataFrame:
|
||||
def predict_data(data, model, results, sample):
|
||||
model_name = model
|
||||
model = choose_model(model)
|
||||
start_time = time.time()
|
||||
numerical_data = choose_numerical_values(df=data)
|
||||
prediction = model.fit_predict(numerical_data)
|
||||
prediction = model.fit_predict(data)
|
||||
execution_time = time.time() - start_time
|
||||
calinski = calinski_harabasz_score(X=numerical_data, labels=prediction)
|
||||
calinski = calinski_harabasz_score(X=data, labels=prediction)
|
||||
silhouette = silhouette_score(
|
||||
X=numerical_data,
|
||||
X=data,
|
||||
labels=prediction,
|
||||
metric="euclidean",
|
||||
sample_size=sample,
|
||||
)
|
||||
populated_results = populate_results(
|
||||
df=results,
|
||||
model=model,
|
||||
model=model_name,
|
||||
prediction=prediction,
|
||||
clusters=len(prediction),
|
||||
calinski=calinski,
|
||||
|
@ -110,10 +107,7 @@ def create_result_dataframes():
|
|||
return indexed_results, indexed_results
|
||||
|
||||
|
||||
def populate_results(
|
||||
df, model, clusters, prediction, calinski, silhouette, time
|
||||
) -> DataFrame:
|
||||
renamed_model = rename_model(model=f"{model}")
|
||||
def populate_results(df, model, clusters, prediction, calinski, silhouette, time):
|
||||
columns = [
|
||||
"model",
|
||||
"clusters",
|
||||
|
@ -122,25 +116,12 @@ def populate_results(
|
|||
"calinski-harabasz",
|
||||
"time",
|
||||
]
|
||||
values = [renamed_model, clusters, prediction, silhouette, calinski, time]
|
||||
values = [model, clusters, prediction, silhouette, calinski, time]
|
||||
dictionary = dict(zip(columns, values))
|
||||
populated_df = df.append(dictionary, ignore_index=True)
|
||||
return populated_df
|
||||
|
||||
|
||||
def rename_model(model) -> str:
|
||||
short_name = ["kmeans", "birch", "affinity", "meanshift", "dbscan"]
|
||||
models = [
|
||||
"KMeans(random_state=42)",
|
||||
"Birch()",
|
||||
"AffinityPropagation(random_state=42)",
|
||||
"MeanShift()",
|
||||
"DBSCAN()",
|
||||
]
|
||||
mapping = dict(zip(models, short_name))
|
||||
return mapping[model]
|
||||
|
||||
|
||||
def construct_case(df, choice):
|
||||
cases = {
|
||||
"case1": df.loc[(df["LUMINOSIDAD"].str.contains("NOCHE"))],
|
||||
|
|
Loading…
Reference in New Issue