From 6fe18d594c45920a4b0ee212df084e3ef4adcecd Mon Sep 17 00:00:00 2001 From: coolneng Date: Fri, 11 Dec 2020 14:37:01 +0100 Subject: [PATCH] Fix sample size selection --- src/P2/processing.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/P2/processing.py b/src/P2/processing.py index 39db65e..44fffaf 100644 --- a/src/P2/processing.py +++ b/src/P2/processing.py @@ -35,6 +35,7 @@ def predict_data(data, model, results, sample): labels=prediction, metric="euclidean", sample_size=sample, + random_state=42, ) populated_results = populate_results( df=results, @@ -80,8 +81,7 @@ def plot_scatter_plot(results): def print_dataframe(df): df.set_index("model") - output_df = df.filter["clusters", "silhouette", "calinski", "time"] - print(output_df) + print(df) def show_results(results): @@ -95,8 +95,8 @@ def show_results(results): def create_result_dataframes(): results = DataFrame( columns=[ - "clusters", "model", + "clusters", "prediction", "silhouette", "calinski-harabasz", @@ -156,7 +156,7 @@ def main(): models = ["kmeans", "birch", "affinity", "meanshift", "dbscan"] if len(argv) != 4: usage() - case, sample = argv[2], argv[3] + case, sample = argv[2], int(argv[3]) data = parse_data(source="data/accidentes_2013.csv", action=str(argv[1])) individual_result, complete_results = create_result_dataframes() case_data = construct_case(df=data, choice=case) @@ -171,8 +171,8 @@ def main(): complete_results = complete_results.append( individual_result.append(model_results) ) - indexed_results = complete_results.set_index("model") - show_results(results=indexed_results) + complete_results.set_index("model") + print_dataframe(df=complete_results) if __name__ == "__main__":