Plot roc_auc_curve and add plumbing for plotting
This commit is contained in:
parent
3dd13a6fb5
commit
c5a147e5df
Binary file not shown.
After Width: | Height: | Size: 83 KiB |
|
@ -1,12 +1,15 @@
|
|||
from numpy import mean
|
||||
from sklearn.metrics import confusion_matrix, accuracy_score, roc_auc_score
|
||||
from sklearn.model_selection import cross_val_score
|
||||
from numpy import mean, arange
|
||||
from sklearn.metrics import confusion_matrix, roc_auc_score, roc_curve
|
||||
from sklearn.model_selection import cross_val_predict
|
||||
from sklearn.naive_bayes import GaussianNB
|
||||
from sklearn.neural_network import MLPClassifier
|
||||
from sklearn.neighbors import KNeighborsClassifier
|
||||
from sklearn.preprocessing import scale
|
||||
from sklearn.svm import LinearSVC
|
||||
from sklearn.tree import DecisionTreeClassifier
|
||||
from seaborn import set_theme
|
||||
from matplotlib.pyplot import *
|
||||
from pandas import DataFrame
|
||||
|
||||
from sys import argv
|
||||
|
||||
|
@ -24,65 +27,141 @@ def choose_model(model):
|
|||
return DecisionTreeClassifier(random_state=42)
|
||||
elif model == "neuralnet":
|
||||
return MLPClassifier(hidden_layer_sizes=10)
|
||||
else:
|
||||
print("Unknown model selected. The choices are: ")
|
||||
print("gnb: Gaussian Naive Bayes")
|
||||
print("svc: Linear Support Vector Classification")
|
||||
print("knn: K-neighbors")
|
||||
print("tree: Decision tree")
|
||||
print("neuralnet: MLP Classifier")
|
||||
exit()
|
||||
|
||||
|
||||
def predict_data(data, target, model):
|
||||
def predict_data(data, target, model, results):
|
||||
model = choose_model(model)
|
||||
if model == "knn":
|
||||
data = scale(data)
|
||||
accuracy_scores = []
|
||||
confusion_matrices = []
|
||||
auc = []
|
||||
confusion_matrices, auc, fpr, tpr = [], [], [], []
|
||||
for train_index, test_index in split_k_sets(data):
|
||||
model.fit(data.iloc[train_index], target.iloc[train_index])
|
||||
prediction = model.predict(data.iloc[test_index])
|
||||
accuracy_scores.append(accuracy_score(target.iloc[test_index], prediction))
|
||||
confusion_matrices.append(confusion_matrix(target.iloc[test_index], prediction))
|
||||
auc.append(roc_auc_score(target.iloc[test_index], prediction))
|
||||
cv_score = cross_val_score(model, data, target, cv=10)
|
||||
evaluate_performance(
|
||||
confusion_matrix=mean(confusion_matrices, axis=0),
|
||||
accuracy=mean(accuracy_scores),
|
||||
cv_score=mean(cv_score),
|
||||
fpr_item, tpr_item, _ = roc_curve(target.iloc[test_index], prediction)
|
||||
fpr.append(fpr_item)
|
||||
tpr.append(tpr_item)
|
||||
populated_results = populate_results(
|
||||
df=results,
|
||||
model=model,
|
||||
fpr=mean(fpr, axis=0),
|
||||
tpr=mean(tpr, axis=0),
|
||||
auc=mean(auc),
|
||||
confusion_matrix=mean(confusion_matrices, axis=0),
|
||||
)
|
||||
return populated_results
|
||||
|
||||
|
||||
def plot_roc_auc_curve(model, results):
|
||||
rounded_auc = round(results.loc[model]["auc"], 3)
|
||||
plot(
|
||||
results.loc[model]["fpr"],
|
||||
results.loc[model]["tpr"],
|
||||
label=f"{model} , AUC={rounded_auc}",
|
||||
)
|
||||
xticks(arange(0.0, 1.0, step=0.1))
|
||||
yticks(arange(0.0, 1.0, step=0.1))
|
||||
legend(loc="lower right")
|
||||
|
||||
|
||||
def plot_confusion_matrix(model, results):
|
||||
matrix = results.loc[model]["confusion_matrix"]
|
||||
classes = ["Negative", "Positive"]
|
||||
for item in matrix:
|
||||
text(x=0.5, y=0.5, s=item)
|
||||
xticks(ticks=arange(len(classes)), labels=classes)
|
||||
yticks(ticks=arange(len(classes)), labels=classes)
|
||||
|
||||
|
||||
def choose_plot_type(type, model, results):
|
||||
if type == "roc":
|
||||
plot_roc_auc_curve(model, results)
|
||||
elif type == "confusion_matrix":
|
||||
plot_confusion_matrix(model, results)
|
||||
|
||||
|
||||
def plot_individual_figure(results, type, x_axis, y_axis, fig_title):
|
||||
fig = figure(figsize=(8, 6))
|
||||
for model in results.index:
|
||||
choose_plot_type(type, model, results)
|
||||
xlabel(x_axis)
|
||||
ylabel(y_axis)
|
||||
title(fig_title)
|
||||
show()
|
||||
fig.savefig(f"docs/assets/{fig_title.replace(' ', '_').lower()}.png")
|
||||
|
||||
|
||||
# TODO Add cross_val_score
|
||||
def plot_all_figures(results):
|
||||
set_theme()
|
||||
plot_individual_figure(
|
||||
results,
|
||||
type="roc",
|
||||
x_axis="fpr",
|
||||
y_axis="tpr",
|
||||
fig_title="ROC AUC curve",
|
||||
)
|
||||
plot_individual_figure(
|
||||
results,
|
||||
type="confusion_matrix",
|
||||
x_axis="fpr",
|
||||
y_axis="tpr",
|
||||
fig_title="Confusion Matrix",
|
||||
)
|
||||
|
||||
|
||||
def evaluate_performance(confusion_matrix, accuracy, cv_score, auc):
|
||||
print("Accuracy Score: " + str(accuracy))
|
||||
print("Confusion matrix: ")
|
||||
print(str(confusion_matrix))
|
||||
print("Cross validation score: " + str(cv_score))
|
||||
print("AUC: " + str(auc))
|
||||
def create_result_dataframes():
|
||||
results = DataFrame(columns=["model", "fpr", "tpr", "auc", "confusion_matrix"])
|
||||
indexed_results = results.set_index("model")
|
||||
return indexed_results, indexed_results
|
||||
|
||||
|
||||
def populate_results(df, model, fpr, tpr, auc, confusion_matrix):
|
||||
renamed_model = rename_model(model=f"{model}")
|
||||
columns = ["model", "fpr", "tpr", "auc", "confusion_matrix"]
|
||||
values = [renamed_model, fpr, tpr, auc, confusion_matrix]
|
||||
dictionary = dict(zip(columns, values))
|
||||
populated_df = df.append(dictionary, ignore_index=True)
|
||||
return populated_df
|
||||
|
||||
|
||||
def rename_model(model):
|
||||
short_name = ["gnb", "svc", "knn", "tree", "neuralnet"]
|
||||
models = [
|
||||
"GaussianNB()",
|
||||
"LinearSVC(random_state=42)",
|
||||
"KNeighborsClassifier(n_neighbors=10)",
|
||||
"DecisionTreeClassifier(random_state=42)",
|
||||
"MLPClassifier(hidden_layer_sizes=10)",
|
||||
]
|
||||
mapping = dict(zip(models, short_name))
|
||||
return mapping[model]
|
||||
|
||||
|
||||
def usage():
|
||||
print("Usage: " + argv[0] + "<preprocessing action> <model>")
|
||||
print("Usage: " + argv[0] + "<preprocessing action>")
|
||||
print("preprocessing actions:")
|
||||
print("fill: fills the na values with the mean")
|
||||
print("drop: drops the na values")
|
||||
print("models:")
|
||||
print("gnb: Gaussian Naive Bayes")
|
||||
print("svc: Linear Support Vector Classification")
|
||||
print("knn: K-neighbors")
|
||||
print("tree: Decision tree")
|
||||
print("neuralnet: MLP Classifier")
|
||||
exit()
|
||||
|
||||
|
||||
def main():
|
||||
if len(argv) != 3:
|
||||
models = ["gnb", "svc", "knn", "tree", "neuralnet"]
|
||||
if len(argv) != 2:
|
||||
usage()
|
||||
data, target = parse_data(source="data/mamografia.csv", action=str(argv[1]))
|
||||
predict_data(data=data, target=target, model=str(argv[2]))
|
||||
individual_result, complete_results = create_result_dataframes()
|
||||
for model in models:
|
||||
model_results = predict_data(
|
||||
data=data, target=target, model=model, results=individual_result
|
||||
)
|
||||
complete_results = complete_results.append(
|
||||
individual_result.append(model_results)
|
||||
)
|
||||
indexed_results = complete_results.set_index("model")
|
||||
plot_all_figures(results=indexed_results)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
|
Loading…
Reference in New Issue