From 103a5bdfb05753434608b4f2d6082603566cf45d Mon Sep 17 00:00:00 2001 From: coolneng Date: Thu, 29 Apr 2021 12:33:46 +0200 Subject: [PATCH] Add files from previous lab --- shell.nix | 7 ++- src/execution.py | 100 +++++++++++++++++++++++++++++++++++++++ src/genetic_algorithm.py | 50 ++++++++++++++++++++ src/main.py | 47 ++++++++++++++++++ src/preprocessing.py | 15 ++++++ 5 files changed, 217 insertions(+), 2 deletions(-) create mode 100644 src/execution.py create mode 100644 src/genetic_algorithm.py create mode 100755 src/main.py create mode 100644 src/preprocessing.py diff --git a/shell.nix b/shell.nix index 81c823e..107409e 100644 --- a/shell.nix +++ b/shell.nix @@ -1,9 +1,12 @@ -{ pkgs ? import {} }: +{ pkgs ? import { } }: with pkgs; mkShell { buildInputs = [ - + python39 + python39Packages.numpy + python39Packages.pandas + python39Packages.XlsxWriter ]; } diff --git a/src/execution.py b/src/execution.py new file mode 100644 index 0000000..a52cb36 --- /dev/null +++ b/src/execution.py @@ -0,0 +1,100 @@ +from glob import glob +from subprocess import run +from sys import executable + +from numpy import mean, std +from pandas import DataFrame, ExcelWriter + + +def file_list(path): + file_list = [] + for fname in glob(path): + file_list.append(fname) + return file_list + + +def create_dataframes(): + greedy = DataFrame() + local = DataFrame() + return greedy, local + + +def process_output(results): + distances = [] + time = [] + for element in results: + for line in element: + if line.startswith(bytes("Total distance:", encoding="utf-8")): + line_elements = line.split(sep=bytes(":", encoding="utf-8")) + distances.append(float(line_elements[1])) + if line.startswith(bytes("Execution time:", encoding="utf-8")): + line_elements = line.split(sep=bytes(":", encoding="utf-8")) + time.append(float(line_elements[1])) + return distances, time + + +def populate_dataframes(greedy, local, greedy_list, local_list, dataset): + greedy_distances, greedy_time = process_output(greedy_list) + local_distances, local_time = process_output(local_list) + greedy_dict = { + "dataset": dataset.removeprefix("data/"), + "media distancia": mean(greedy_distances), + "desviacion distancia": std(greedy_distances), + "media tiempo": mean(greedy_time), + "desviacion tiempo": std(greedy_time), + } + local_dict = { + "dataset": dataset.removeprefix("data/"), + "media distancia": mean(local_distances), + "desviacion distancia": std(local_distances), + "media tiempo": mean(local_time), + "desviacion tiempo": std(local_time), + } + greedy = greedy.append(greedy_dict, ignore_index=True) + local = local.append(local_dict, ignore_index=True) + return greedy, local + + +def script_execution(filenames, greedy, local, iterations=3): + script = "src/main.py" + for dataset in filenames: + print(f"Running on dataset {dataset}") + greedy_list = [] + local_list = [] + for _ in range(iterations): + greedy_cmd = run( + [executable, script, dataset, "greedy"], capture_output=True + ).stdout.splitlines() + local_cmd = run( + [executable, script, dataset, "local"], capture_output=True + ).stdout.splitlines() + greedy_list.append(greedy_cmd) + local_list.append(local_cmd) + greedy, local = populate_dataframes( + greedy, local, greedy_list, local_list, dataset + ) + return greedy, local + + +def export_results(greedy, local): + dataframes = {"Greedy": greedy, "Local search": local} + writer = ExcelWriter(path="docs/algorithm-results.xlsx", engine="xlsxwriter") + for name, df in dataframes.items(): + df.to_excel(writer, sheet_name=name, index=False) + worksheet = writer.sheets[name] + for index, column in enumerate(df): + series = df[column] + max_length = max(series.astype(str).str.len().max(), len(str(series.name))) + worksheet.set_column(index, index, width=max_length + 5) + writer.save() + + +def main(): + datasets = file_list(path="data/*.txt") + greedy, local = create_dataframes() + populated_greedy, populated_local = script_execution(datasets, greedy, local) + export_results(populated_greedy, populated_local) + + +if __name__ == "__main__": + main() diff --git a/src/genetic_algorithm.py b/src/genetic_algorithm.py new file mode 100644 index 0000000..3567ba6 --- /dev/null +++ b/src/genetic_algorithm.py @@ -0,0 +1,50 @@ +from numpy.random import choice, seed + + +def get_first_random_solution(m, data): + seed(42) + random_indexes = choice(len(data.index), size=m, replace=False) + return data.loc[random_indexes] + + +def element_in_dataframe(solution, element): + duplicates = solution.query( + f"(source == {element.source} and destination == {element.destination}) or (source == {element.destination} and destination == {element.source})" + ) + return not duplicates.empty + + +def replace_worst_element(previous, data): + solution = previous.copy() + worst_index = solution["distance"].astype(float).idxmin() + random_element = data.sample().squeeze() + while element_in_dataframe(solution=solution, element=random_element): + random_element = data.sample().squeeze() + solution.loc[worst_index] = random_element + return solution, worst_index + + +def get_random_solution(previous, data): + solution, worst_index = replace_worst_element(previous, data) + previous_worst_distance = previous["distance"].loc[worst_index] + while solution.distance.loc[worst_index] <= previous_worst_distance: + solution, _ = replace_worst_element(previous=solution, data=data) + return solution + + +def explore_neighbourhood(element, data, max_iterations=100000): + neighbourhood = [] + neighbourhood.append(element) + for _ in range(max_iterations): + previous_solution = neighbourhood[-1] + neighbour = get_random_solution(previous=previous_solution, data=data) + neighbourhood.append(neighbour) + return neighbour + + +def local_search(m, data): + first_solution = get_first_random_solution(m=m, data=data) + best_solution = explore_neighbourhood( + element=first_solution, data=data, max_iterations=100 + ) + return best_solution diff --git a/src/main.py b/src/main.py new file mode 100755 index 0000000..f70aaa1 --- /dev/null +++ b/src/main.py @@ -0,0 +1,47 @@ +from preprocessing import parse_file +from greedy import greedy_algorithm +from local_search import local_search +from sys import argv +from time import time + + +def execute_algorithm(choice, n, m, data): + if choice == "greedy": + return greedy_algorithm(n, m, data) + elif choice == "local": + return local_search(m, data) + else: + print("The valid algorithm choices are 'greedy' and 'local'") + exit(1) + + +def show_results(solutions, time_delta): + distance_sum = solutions["distance"].sum() + duplicates = solutions.duplicated().any() + print(solutions) + print("Total distance: " + str(distance_sum)) + if not duplicates: + print("No duplicates found") + print("Execution time: " + str(time_delta)) + + +def usage(argv): + print(f"Usage: python {argv[0]} ") + print("algorithm choices:") + print("greedy: greedy algorithm") + print("local: local search algorithm") + exit(1) + + +def main(): + if len(argv) != 3: + usage(argv) + n, m, data = parse_file(argv[1]) + start_time = time() + solutions = execute_algorithm(choice=argv[2], n=n, m=m, data=data) + end_time = time() + show_results(solutions, time_delta=end_time - start_time) + + +if __name__ == "__main__": + main() diff --git a/src/preprocessing.py b/src/preprocessing.py new file mode 100644 index 0000000..bbd4d0d --- /dev/null +++ b/src/preprocessing.py @@ -0,0 +1,15 @@ +from pandas import read_table + + +def read_header(filename): + with open(filename, "r") as f: + header = f.readline().split() + return int(header[0]), int(header[1]) + + +def parse_file(filename): + n, m = read_header(filename) + df = read_table( + filename, names=["source", "destination", "distance"], sep=" ", skiprows=[0] + ) + return n, m, df