diff --git a/docs/Summary.org b/docs/Summary.org index b83f1d5..3e18e2f 100644 --- a/docs/Summary.org +++ b/docs/Summary.org @@ -75,6 +75,8 @@ def parse_data(source, action): return test_data, target_data #+end_src +#+RESULTS: + A continuación, mostraremos cada uno de los pasos que realizamos para obtener el /dataset/ final: @@ -203,3 +205,130 @@ Datos objetivo: 4 1 Name: Severity, dtype: int64 #+end_example +** Configuración de algoritmos + +Elegimos 5 algoritmos distintos: + +1. Naive Bayes +2. Linear Support Vector Classification +3. K Nearest Neighbors +4. Árbol de decisión +5. Perceptrón multicapa (red neuronal) + +Procedemos a evaluar el rendimiento de cada algoritmo, usando las siguientes métricas: + +- Accuracy score +- Matriz de confusión +- Cross validation score +- Area under the curve (AUC) + +Vamos a realizar 2 ejecuciones por algoritmo, para evaluar las diferencias que obtenemos según el preprocesado utilizado (eliminación de valores nulos o imputación). + +La implementación se encuentra en el archivo /processing.py/, cuyo contenido mostramos a continuación: + +#+begin_src python +from numpy import mean +from sklearn.metrics import confusion_matrix, accuracy_score, roc_auc_score +from sklearn.model_selection import cross_val_score +from sklearn.naive_bayes import GaussianNB +from sklearn.neural_network import MLPClassifier +from sklearn.neighbors import KNeighborsClassifier +from sklearn.preprocessing import scale +from sklearn.svm import LinearSVC +from sklearn.tree import DecisionTreeClassifier + +from sys import argv + +from preprocessing import parse_data, split_k_sets + + +def choose_model(model): + if model == "gnb": + return GaussianNB() + elif model == "svc": + return LinearSVC(random_state=42) + elif model == "knn": + return KNeighborsClassifier(n_neighbors=10) + elif model == "tree": + return DecisionTreeClassifier(random_state=42) + elif model == "neuralnet": + return MLPClassifier(hidden_layer_sizes=10) + else: + print("Unknown model selected. The choices are: ") + print("gnb: Gaussian Naive Bayes") + print("svc: Linear Support Vector Classification") + print("knn: K-neighbors") + print("tree: Decision tree") + print("neuralnet: MLP Classifier") + exit() + + +def predict_data(data, target, model): + model = choose_model(model) + if model == "knn": + data = scale(data) + accuracy_scores = [] + confusion_matrices = [] + auc = [] + for train_index, test_index in split_k_sets(data): + model.fit(data.iloc[train_index], target.iloc[train_index]) + prediction = model.predict(data.iloc[test_index]) + accuracy_scores.append(accuracy_score(target.iloc[test_index], prediction)) + confusion_matrices.append(confusion_matrix(target.iloc[test_index], prediction)) + auc.append(roc_auc_score(target.iloc[test_index], prediction)) + cv_score = cross_val_score(model, data, target, cv=10) + evaluate_performance( + confusion_matrix=mean(confusion_matrices, axis=0), + accuracy=mean(accuracy_scores), + cv_score=mean(cv_score), + auc=mean(auc), + ) + + +def evaluate_performance(confusion_matrix, accuracy, cv_score, auc): + print("Accuracy Score: " + str(accuracy)) + print("Confusion matrix: ") + print(str(confusion_matrix)) + print("Cross validation score: " + str(cv_score)) + print("AUC: " + str(auc)) +#+end_src + +** Resultados obtenidos +*** Naives Bayes + +Los resultados que obtenemos son los siguientes: + +#+CAPTION: Naive Bayes +[[./assets/gnb.png]] +*** Linear SVC + +Los resultados que obtenemos son los siguientes: + +#+CAPTION: Linear SVC con eliminación +[[./assets/svc_drop.png]] + +#+CAPTION: Linear SVC con imputación +[[./assets/svc_fill.png]] +*** KNN +Antes de ejecutar este algoritmo, normalizamos los datos dado que el /KNN/ es un algoritmo basado en distancia. + +Los resultados que obtenemos son los siguientes: + +#+CAPTION: KNN +[[./assets/knn.png]] +*** Árbol de decisión + +Los resultados que obtenemos son los siguientes: + +#+CAPTION: Árbol de decisión +[[./assets/tree.png]] +*** Perceptrón multicapa + +Los resultados que obtenemos son los siguientes: + +#+CAPTION: Perceptrón multicapa con eliminación +[[./assets/neuralnet_drop.png]] + +#+CAPTION: Perceptrón multicapa con imputación +[[./assets/neuralnet_fill.png]] +** Análisis de resultados diff --git a/docs/Summary.pdf b/docs/Summary.pdf index 9c22c93..3144065 100644 Binary files a/docs/Summary.pdf and b/docs/Summary.pdf differ diff --git a/docs/assets/gnb.png b/docs/assets/gnb.png new file mode 100644 index 0000000..6fdacd1 Binary files /dev/null and b/docs/assets/gnb.png differ diff --git a/docs/assets/knn.png b/docs/assets/knn.png new file mode 100644 index 0000000..7b9a2d6 Binary files /dev/null and b/docs/assets/knn.png differ diff --git a/docs/assets/neuralnet_drop.png b/docs/assets/neuralnet_drop.png new file mode 100644 index 0000000..c385737 Binary files /dev/null and b/docs/assets/neuralnet_drop.png differ diff --git a/docs/assets/neuralnet_fill.png b/docs/assets/neuralnet_fill.png new file mode 100644 index 0000000..bbb92e2 Binary files /dev/null and b/docs/assets/neuralnet_fill.png differ diff --git a/docs/assets/svc_drop.png b/docs/assets/svc_drop.png new file mode 100644 index 0000000..bedeba2 Binary files /dev/null and b/docs/assets/svc_drop.png differ diff --git a/docs/assets/svc_fill.png b/docs/assets/svc_fill.png new file mode 100644 index 0000000..86a2f96 Binary files /dev/null and b/docs/assets/svc_fill.png differ diff --git a/docs/assets/tree.png b/docs/assets/tree.png new file mode 100644 index 0000000..66f5874 Binary files /dev/null and b/docs/assets/tree.png differ