From 0421493eff729fe30595244f0a2b81b609c9b505 Mon Sep 17 00:00:00 2001 From: coolneng Date: Tue, 10 Nov 2020 20:28:59 +0100 Subject: [PATCH] Add processing module --- src/processing.py | 77 ++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 76 insertions(+), 1 deletion(-) diff --git a/src/processing.py b/src/processing.py index 8eba2ec..0f8ca17 100644 --- a/src/processing.py +++ b/src/processing.py @@ -1 +1,76 @@ -from preprocessing import parse_data +from numpy import mean +from sklearn.metrics import classification_report, confusion_matrix +from sklearn.model_selection import cross_val_score +from sklearn.naive_bayes import GaussianNB +from sklearn.neighbors import KNeighborsClassifier +from sklearn.preprocessing import scale +from sklearn.svm import LinearSVC +from sklearn.tree import DecisionTreeClassifier + +from preprocessing import parse_data, split_k_sets + + +def naive_bayes(): + model = GaussianNB() + return model + + +def linear_svc(): + model = LinearSVC(random_state=42) + return model + + +def k_nearest_neighbors(): + model = KNeighborsClassifier(n_neighbors=10) + return model + + +def decision_tree(): + model = DecisionTreeClassifier(random_state=42) + return model + + +def choose_model(model): + if model == "gnb": + return naive_bayes() + elif model == "svc": + return linear_svc() + elif model == "knn": + return k_nearest_neighbors() + elif model == "tree": + return decision_tree() + + +def predict_data(data, target, model): + model = choose_model(model) + if model == "knn": + data = scale(data) + predictions = [] + for train_index, test_index in split_k_sets(data): + model.fit(data.iloc[train_index], target.iloc[train_index]) + prediction = model.predict(data.iloc[test_index]) + predictions.append(prediction) + return model, predictions + + +def evaluate_performance(predictions, model, data, target): + confusion_matrices = [] + classification_reports = [] + score = cross_val_score(model, data, target, cv=10) + for prediction in predictions: + confusion_matrices.append(confusion_matrix(target, prediction)) + classification_reports.append(classification_report(target, prediction)) + print("Model:" + model) + print("Score: " + score) + print("Confusion matrix: " + mean(confusion_matrices)) + print("Classification report: " + mean(classification_reports)) + + +def main(): + data, target = parse_data(source="../data/mamografia.csv", action="fill") + model, predictions = predict_data(data=data, target=target, model="knn") + evaluate_performance(predictions=predictions, model=model, data=data, target=target) + + +if __name__ == "__main__": + main()