diff --git a/src/processing.py b/src/processing.py index 925c001..32fdd99 100644 --- a/src/processing.py +++ b/src/processing.py @@ -1,5 +1,5 @@ from numpy import mean -from sklearn.metrics import classification_report, confusion_matrix +from sklearn.metrics import confusion_matrix, accuracy_score from sklearn.model_selection import cross_val_score from sklearn.naive_bayes import GaussianNB from sklearn.neighbors import KNeighborsClassifier @@ -25,31 +25,31 @@ def predict_data(data, target, model): model = choose_model(model) if model == "knn": data = scale(data) - predictions = [] + accuracy_scores = [] + confusion_matrices = [] for train_index, test_index in split_k_sets(data): model.fit(data.iloc[train_index], target.iloc[train_index]) prediction = model.predict(data.iloc[test_index]) - predictions.append(prediction) - return model, predictions + accuracy_scores.append(accuracy_score(target.iloc[test_index], prediction)) + confusion_matrices.append(confusion_matrix(target.iloc[test_index], prediction)) + cv_score = cross_val_score(model, data, target, cv=10) + evaluate_performance( + confusion_matrix=mean(confusion_matrices, axis=0), + accuracy=mean(accuracy_scores), + cv_score=mean(cv_score), + ) -def evaluate_performance(predictions, model, data, target): - confusion_matrices = [] - classification_reports = [] - score = cross_val_score(model, data, target, cv=10) - for prediction in predictions: - confusion_matrices.append(confusion_matrix(target, prediction)) - classification_reports.append(classification_report(target, prediction)) - print("Model:" + model) - print("Score: " + score) - print("Confusion matrix: " + mean(confusion_matrices)) - print("Classification report: " + mean(classification_reports)) +def evaluate_performance(confusion_matrix, accuracy, cv_score): + print("Accuracy Score: " + str(accuracy)) + print("Confusion matrix: ") + print(str(confusion_matrix)) + print("Cross validation score: " + str(cv_score)) def main(): - data, target = parse_data(source="../data/mamografia.csv", action="fill") - model, predictions = predict_data(data=data, target=target, model="knn") - evaluate_performance(predictions=predictions, model=model, data=data, target=target) + data, target = parse_data(source="data/mamografia.csv", action="drop") + predict_data(data=data, target=target, model="svc") if __name__ == "__main__":