diff --git a/src/processing.py b/src/processing.py index 32fdd99..514230f 100644 --- a/src/processing.py +++ b/src/processing.py @@ -1,5 +1,5 @@ from numpy import mean -from sklearn.metrics import confusion_matrix, accuracy_score +from sklearn.metrics import confusion_matrix, accuracy_score, roc_auc_score from sklearn.model_selection import cross_val_score from sklearn.naive_bayes import GaussianNB from sklearn.neighbors import KNeighborsClassifier @@ -27,29 +27,33 @@ def predict_data(data, target, model): data = scale(data) accuracy_scores = [] confusion_matrices = [] + auc = [] for train_index, test_index in split_k_sets(data): model.fit(data.iloc[train_index], target.iloc[train_index]) prediction = model.predict(data.iloc[test_index]) accuracy_scores.append(accuracy_score(target.iloc[test_index], prediction)) confusion_matrices.append(confusion_matrix(target.iloc[test_index], prediction)) + auc.append(roc_auc_score(target.iloc[test_index], prediction)) cv_score = cross_val_score(model, data, target, cv=10) evaluate_performance( confusion_matrix=mean(confusion_matrices, axis=0), accuracy=mean(accuracy_scores), cv_score=mean(cv_score), + auc=mean(auc), ) -def evaluate_performance(confusion_matrix, accuracy, cv_score): +def evaluate_performance(confusion_matrix, accuracy, cv_score, auc): print("Accuracy Score: " + str(accuracy)) print("Confusion matrix: ") print(str(confusion_matrix)) print("Cross validation score: " + str(cv_score)) + print("AUC: " + str(auc)) def main(): data, target = parse_data(source="data/mamografia.csv", action="drop") - predict_data(data=data, target=target, model="svc") + predict_data(data=data, target=target, model="gnb") if __name__ == "__main__":