Remove type hints and the rename_model function
This commit is contained in:
parent
a3798a781f
commit
e15685d575
|
@ -29,8 +29,9 @@ def choose_model(model):
|
||||||
|
|
||||||
|
|
||||||
def predict_data(data, target, model, results):
|
def predict_data(data, target, model, results):
|
||||||
model = choose_model(model)
|
model_name = model
|
||||||
if model == "knn":
|
model = choose_model(model=model)
|
||||||
|
if model_name == "knn":
|
||||||
data = scale(data)
|
data = scale(data)
|
||||||
confusion_matrices, auc, fpr, tpr = [], [], [], []
|
confusion_matrices, auc, fpr, tpr = [], [], [], []
|
||||||
for train_index, test_index in split_k_sets(data):
|
for train_index, test_index in split_k_sets(data):
|
||||||
|
@ -43,7 +44,7 @@ def predict_data(data, target, model, results):
|
||||||
tpr.append(tpr_item)
|
tpr.append(tpr_item)
|
||||||
populated_results = populate_results(
|
populated_results = populate_results(
|
||||||
df=results,
|
df=results,
|
||||||
model=model,
|
model=model_name,
|
||||||
fpr=mean(fpr, axis=0),
|
fpr=mean(fpr, axis=0),
|
||||||
tpr=mean(tpr, axis=0),
|
tpr=mean(tpr, axis=0),
|
||||||
auc=mean(auc),
|
auc=mean(auc),
|
||||||
|
@ -122,27 +123,13 @@ def create_result_dataframes():
|
||||||
|
|
||||||
|
|
||||||
def populate_results(df, model, fpr, tpr, auc, confusion_matrix):
|
def populate_results(df, model, fpr, tpr, auc, confusion_matrix):
|
||||||
renamed_model = rename_model(model=f"{model}")
|
|
||||||
columns = ["model", "fpr", "tpr", "auc", "confusion_matrix"]
|
columns = ["model", "fpr", "tpr", "auc", "confusion_matrix"]
|
||||||
values = [renamed_model, fpr, tpr, auc, confusion_matrix]
|
values = [model, fpr, tpr, auc, confusion_matrix]
|
||||||
dictionary = dict(zip(columns, values))
|
dictionary = dict(zip(columns, values))
|
||||||
populated_df = df.append(dictionary, ignore_index=True)
|
populated_df = df.append(dictionary, ignore_index=True)
|
||||||
return populated_df
|
return populated_df
|
||||||
|
|
||||||
|
|
||||||
def rename_model(model):
|
|
||||||
short_name = ["gnb", "svc", "knn", "tree", "neuralnet"]
|
|
||||||
models = [
|
|
||||||
"GaussianNB()",
|
|
||||||
"LinearSVC(random_state=42)",
|
|
||||||
"KNeighborsClassifier(n_neighbors=10)",
|
|
||||||
"DecisionTreeClassifier(random_state=42)",
|
|
||||||
"MLPClassifier(hidden_layer_sizes=10)",
|
|
||||||
]
|
|
||||||
mapping = dict(zip(models, short_name))
|
|
||||||
return mapping[model]
|
|
||||||
|
|
||||||
|
|
||||||
def transform_dataframe(data, target):
|
def transform_dataframe(data, target):
|
||||||
joined_df = data.join(target)
|
joined_df = data.join(target)
|
||||||
binned_df = joined_df.copy()
|
binned_df = joined_df.copy()
|
||||||
|
|
|
@ -2,13 +2,13 @@ from pandas import DataFrame, read_csv
|
||||||
from sklearn.preprocessing import normalize
|
from sklearn.preprocessing import normalize
|
||||||
|
|
||||||
|
|
||||||
def replace_values(df) -> DataFrame:
|
def replace_values(df):
|
||||||
for column in df.columns:
|
for column in df.columns:
|
||||||
df[column].fillna(value=df[column].mean(), inplace=True)
|
df[column].fillna(value=df[column].mean(), inplace=True)
|
||||||
return df
|
return df
|
||||||
|
|
||||||
|
|
||||||
def process_na(df, action) -> DataFrame:
|
def process_na(df, action):
|
||||||
if action == "drop":
|
if action == "drop":
|
||||||
return df.dropna()
|
return df.dropna()
|
||||||
elif action == "fill":
|
elif action == "fill":
|
||||||
|
@ -20,7 +20,7 @@ def process_na(df, action) -> DataFrame:
|
||||||
exit()
|
exit()
|
||||||
|
|
||||||
|
|
||||||
def filter_dataframe(df) -> DataFrame:
|
def filter_dataframe(df):
|
||||||
relevant_columns = [
|
relevant_columns = [
|
||||||
"HORA",
|
"HORA",
|
||||||
"DIASEMANA",
|
"DIASEMANA",
|
||||||
|
@ -38,18 +38,7 @@ def filter_dataframe(df) -> DataFrame:
|
||||||
return filtered_df
|
return filtered_df
|
||||||
|
|
||||||
|
|
||||||
def choose_numerical_values(df):
|
def normalize_numerical_values(df):
|
||||||
cols = [
|
|
||||||
"TOT_HERIDOS_LEVES",
|
|
||||||
"TOT_HERIDOS_GRAVES",
|
|
||||||
"TOT_VEHICULOS_IMPLICADOS",
|
|
||||||
"TOT_MUERTOS",
|
|
||||||
]
|
|
||||||
filtered_df = df.filter(items=cols)
|
|
||||||
return filtered_df
|
|
||||||
|
|
||||||
|
|
||||||
def normalize_numerical_values(df) -> DataFrame:
|
|
||||||
cols = [
|
cols = [
|
||||||
"TOT_HERIDOS_LEVES",
|
"TOT_HERIDOS_LEVES",
|
||||||
"TOT_HERIDOS_GRAVES",
|
"TOT_HERIDOS_GRAVES",
|
||||||
|
@ -63,7 +52,7 @@ def normalize_numerical_values(df) -> DataFrame:
|
||||||
return df
|
return df
|
||||||
|
|
||||||
|
|
||||||
def parse_data(source, action) -> DataFrame:
|
def parse_data(source, action):
|
||||||
df = read_csv(filepath_or_buffer=source, na_values="?")
|
df = read_csv(filepath_or_buffer=source, na_values="?")
|
||||||
processed_df = process_na(df=df, action=action)
|
processed_df = process_na(df=df, action=action)
|
||||||
filtered_df = filter_dataframe(df=processed_df)
|
filtered_df = filter_dataframe(df=processed_df)
|
||||||
|
|
|
@ -1,5 +1,4 @@
|
||||||
import time
|
import time
|
||||||
from typing import Union
|
|
||||||
from sys import argv
|
from sys import argv
|
||||||
|
|
||||||
from matplotlib.pyplot import *
|
from matplotlib.pyplot import *
|
||||||
|
@ -8,12 +7,10 @@ from seaborn import heatmap, set_style, set_theme, pairplot
|
||||||
from sklearn.metrics import silhouette_score, calinski_harabasz_score
|
from sklearn.metrics import silhouette_score, calinski_harabasz_score
|
||||||
from sklearn.cluster import KMeans, Birch, AffinityPropagation, MeanShift, DBSCAN
|
from sklearn.cluster import KMeans, Birch, AffinityPropagation, MeanShift, DBSCAN
|
||||||
|
|
||||||
from preprocessing import parse_data, choose_numerical_values
|
from preprocessing import parse_data
|
||||||
|
|
||||||
|
|
||||||
def choose_model(
|
def choose_model(model):
|
||||||
model,
|
|
||||||
) -> Union[KMeans, Birch, AffinityPropagation, MeanShift, DBSCAN, None]:
|
|
||||||
if model == "kmeans":
|
if model == "kmeans":
|
||||||
return KMeans(random_state=42)
|
return KMeans(random_state=42)
|
||||||
elif model == "birch":
|
elif model == "birch":
|
||||||
|
@ -26,22 +23,22 @@ def choose_model(
|
||||||
return DBSCAN()
|
return DBSCAN()
|
||||||
|
|
||||||
|
|
||||||
def predict_data(data, model, results, sample) -> DataFrame:
|
def predict_data(data, model, results, sample):
|
||||||
|
model_name = model
|
||||||
model = choose_model(model)
|
model = choose_model(model)
|
||||||
start_time = time.time()
|
start_time = time.time()
|
||||||
numerical_data = choose_numerical_values(df=data)
|
prediction = model.fit_predict(data)
|
||||||
prediction = model.fit_predict(numerical_data)
|
|
||||||
execution_time = time.time() - start_time
|
execution_time = time.time() - start_time
|
||||||
calinski = calinski_harabasz_score(X=numerical_data, labels=prediction)
|
calinski = calinski_harabasz_score(X=data, labels=prediction)
|
||||||
silhouette = silhouette_score(
|
silhouette = silhouette_score(
|
||||||
X=numerical_data,
|
X=data,
|
||||||
labels=prediction,
|
labels=prediction,
|
||||||
metric="euclidean",
|
metric="euclidean",
|
||||||
sample_size=sample,
|
sample_size=sample,
|
||||||
)
|
)
|
||||||
populated_results = populate_results(
|
populated_results = populate_results(
|
||||||
df=results,
|
df=results,
|
||||||
model=model,
|
model=model_name,
|
||||||
prediction=prediction,
|
prediction=prediction,
|
||||||
clusters=len(prediction),
|
clusters=len(prediction),
|
||||||
calinski=calinski,
|
calinski=calinski,
|
||||||
|
@ -110,10 +107,7 @@ def create_result_dataframes():
|
||||||
return indexed_results, indexed_results
|
return indexed_results, indexed_results
|
||||||
|
|
||||||
|
|
||||||
def populate_results(
|
def populate_results(df, model, clusters, prediction, calinski, silhouette, time):
|
||||||
df, model, clusters, prediction, calinski, silhouette, time
|
|
||||||
) -> DataFrame:
|
|
||||||
renamed_model = rename_model(model=f"{model}")
|
|
||||||
columns = [
|
columns = [
|
||||||
"model",
|
"model",
|
||||||
"clusters",
|
"clusters",
|
||||||
|
@ -122,25 +116,12 @@ def populate_results(
|
||||||
"calinski-harabasz",
|
"calinski-harabasz",
|
||||||
"time",
|
"time",
|
||||||
]
|
]
|
||||||
values = [renamed_model, clusters, prediction, silhouette, calinski, time]
|
values = [model, clusters, prediction, silhouette, calinski, time]
|
||||||
dictionary = dict(zip(columns, values))
|
dictionary = dict(zip(columns, values))
|
||||||
populated_df = df.append(dictionary, ignore_index=True)
|
populated_df = df.append(dictionary, ignore_index=True)
|
||||||
return populated_df
|
return populated_df
|
||||||
|
|
||||||
|
|
||||||
def rename_model(model) -> str:
|
|
||||||
short_name = ["kmeans", "birch", "affinity", "meanshift", "dbscan"]
|
|
||||||
models = [
|
|
||||||
"KMeans(random_state=42)",
|
|
||||||
"Birch()",
|
|
||||||
"AffinityPropagation(random_state=42)",
|
|
||||||
"MeanShift()",
|
|
||||||
"DBSCAN()",
|
|
||||||
]
|
|
||||||
mapping = dict(zip(models, short_name))
|
|
||||||
return mapping[model]
|
|
||||||
|
|
||||||
|
|
||||||
def construct_case(df, choice):
|
def construct_case(df, choice):
|
||||||
cases = {
|
cases = {
|
||||||
"case1": df.loc[(df["LUMINOSIDAD"].str.contains("NOCHE"))],
|
"case1": df.loc[(df["LUMINOSIDAD"].str.contains("NOCHE"))],
|
||||||
|
|
Loading…
Reference in New Issue