Add processing module
This commit is contained in:
parent
1cd11452fc
commit
0421493eff
|
@ -1 +1,76 @@
|
||||||
from preprocessing import parse_data
|
from numpy import mean
|
||||||
|
from sklearn.metrics import classification_report, confusion_matrix
|
||||||
|
from sklearn.model_selection import cross_val_score
|
||||||
|
from sklearn.naive_bayes import GaussianNB
|
||||||
|
from sklearn.neighbors import KNeighborsClassifier
|
||||||
|
from sklearn.preprocessing import scale
|
||||||
|
from sklearn.svm import LinearSVC
|
||||||
|
from sklearn.tree import DecisionTreeClassifier
|
||||||
|
|
||||||
|
from preprocessing import parse_data, split_k_sets
|
||||||
|
|
||||||
|
|
||||||
|
def naive_bayes():
|
||||||
|
model = GaussianNB()
|
||||||
|
return model
|
||||||
|
|
||||||
|
|
||||||
|
def linear_svc():
|
||||||
|
model = LinearSVC(random_state=42)
|
||||||
|
return model
|
||||||
|
|
||||||
|
|
||||||
|
def k_nearest_neighbors():
|
||||||
|
model = KNeighborsClassifier(n_neighbors=10)
|
||||||
|
return model
|
||||||
|
|
||||||
|
|
||||||
|
def decision_tree():
|
||||||
|
model = DecisionTreeClassifier(random_state=42)
|
||||||
|
return model
|
||||||
|
|
||||||
|
|
||||||
|
def choose_model(model):
|
||||||
|
if model == "gnb":
|
||||||
|
return naive_bayes()
|
||||||
|
elif model == "svc":
|
||||||
|
return linear_svc()
|
||||||
|
elif model == "knn":
|
||||||
|
return k_nearest_neighbors()
|
||||||
|
elif model == "tree":
|
||||||
|
return decision_tree()
|
||||||
|
|
||||||
|
|
||||||
|
def predict_data(data, target, model):
|
||||||
|
model = choose_model(model)
|
||||||
|
if model == "knn":
|
||||||
|
data = scale(data)
|
||||||
|
predictions = []
|
||||||
|
for train_index, test_index in split_k_sets(data):
|
||||||
|
model.fit(data.iloc[train_index], target.iloc[train_index])
|
||||||
|
prediction = model.predict(data.iloc[test_index])
|
||||||
|
predictions.append(prediction)
|
||||||
|
return model, predictions
|
||||||
|
|
||||||
|
|
||||||
|
def evaluate_performance(predictions, model, data, target):
|
||||||
|
confusion_matrices = []
|
||||||
|
classification_reports = []
|
||||||
|
score = cross_val_score(model, data, target, cv=10)
|
||||||
|
for prediction in predictions:
|
||||||
|
confusion_matrices.append(confusion_matrix(target, prediction))
|
||||||
|
classification_reports.append(classification_report(target, prediction))
|
||||||
|
print("Model:" + model)
|
||||||
|
print("Score: " + score)
|
||||||
|
print("Confusion matrix: " + mean(confusion_matrices))
|
||||||
|
print("Classification report: " + mean(classification_reports))
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
data, target = parse_data(source="../data/mamografia.csv", action="fill")
|
||||||
|
model, predictions = predict_data(data=data, target=target, model="knn")
|
||||||
|
evaluate_performance(predictions=predictions, model=model, data=data, target=target)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
|
|
Loading…
Reference in New Issue