From 103a5bdfb05753434608b4f2d6082603566cf45d Mon Sep 17 00:00:00 2001
From: coolneng <akasroua@gmail.com>
Date: Thu, 29 Apr 2021 12:33:46 +0200
Subject: [PATCH] Add files from previous lab

---
 shell.nix                |   7 ++-
 src/execution.py         | 100 +++++++++++++++++++++++++++++++++++++++
 src/genetic_algorithm.py |  50 ++++++++++++++++++++
 src/main.py              |  47 ++++++++++++++++++
 src/preprocessing.py     |  15 ++++++
 5 files changed, 217 insertions(+), 2 deletions(-)
 create mode 100644 src/execution.py
 create mode 100644 src/genetic_algorithm.py
 create mode 100755 src/main.py
 create mode 100644 src/preprocessing.py
diff --git a/shell.nix b/shell.nix
index 81c823e..107409e 100644
--- a/shell.nix
+++ b/shell.nix
@@ -1,9 +1,12 @@
-{ pkgs ? import <nixpkgs> {} }:
+{ pkgs ? import <nixpkgs> { } }:
 
 with pkgs;
 
 mkShell {
   buildInputs = [
-
+    python39
+    python39Packages.numpy
+    python39Packages.pandas
+    python39Packages.XlsxWriter
   ];
 }
diff --git a/src/execution.py b/src/execution.py
new file mode 100644
index 0000000..a52cb36
--- /dev/null
+++ b/src/execution.py
@@ -0,0 +1,100 @@
+from glob import glob
+from subprocess import run
+from sys import executable
+
+from numpy import mean, std
+from pandas import DataFrame, ExcelWriter
+
+
+def file_list(path):
+    file_list = []
+    for fname in glob(path):
+        file_list.append(fname)
+    return file_list
+
+
+def create_dataframes():
+    greedy = DataFrame()
+    local = DataFrame()
+    return greedy, local
+
+
+def process_output(results):
+    distances = []
+    time = []
+    for element in results:
+        for line in element:
+            if line.startswith(bytes("Total distance:", encoding="utf-8")):
+                line_elements = line.split(sep=bytes(":", encoding="utf-8"))
+                distances.append(float(line_elements[1]))
+            if line.startswith(bytes("Execution time:", encoding="utf-8")):
+                line_elements = line.split(sep=bytes(":", encoding="utf-8"))
+                time.append(float(line_elements[1]))
+    return distances, time
+
+
+def populate_dataframes(greedy, local, greedy_list, local_list, dataset):
+    greedy_distances, greedy_time = process_output(greedy_list)
+    local_distances, local_time = process_output(local_list)
+    greedy_dict = {
+        "dataset": dataset.removeprefix("data/"),
+        "media distancia": mean(greedy_distances),
+        "desviacion distancia": std(greedy_distances),
+        "media tiempo": mean(greedy_time),
+        "desviacion tiempo": std(greedy_time),
+    }
+    local_dict = {
+        "dataset": dataset.removeprefix("data/"),
+        "media distancia": mean(local_distances),
+        "desviacion distancia": std(local_distances),
+        "media tiempo": mean(local_time),
+        "desviacion tiempo": std(local_time),
+    }
+    greedy = greedy.append(greedy_dict, ignore_index=True)
+    local = local.append(local_dict, ignore_index=True)
+    return greedy, local
+
+
+def script_execution(filenames, greedy, local, iterations=3):
+    script = "src/main.py"
+    for dataset in filenames:
+        print(f"Running on dataset {dataset}")
+        greedy_list = []
+        local_list = []
+        for _ in range(iterations):
+            greedy_cmd = run(
+                [executable, script, dataset, "greedy"], capture_output=True
+            ).stdout.splitlines()
+            local_cmd = run(
+                [executable, script, dataset, "local"], capture_output=True
+            ).stdout.splitlines()
+            greedy_list.append(greedy_cmd)
+            local_list.append(local_cmd)
+        greedy, local = populate_dataframes(
+            greedy, local, greedy_list, local_list, dataset
+        )
+    return greedy, local
+
+
+def export_results(greedy, local):
+    dataframes = {"Greedy": greedy, "Local search": local}
+    writer = ExcelWriter(path="docs/algorithm-results.xlsx", engine="xlsxwriter")
+    for name, df in dataframes.items():
+        df.to_excel(writer, sheet_name=name, index=False)
+        worksheet = writer.sheets[name]
+        for index, column in enumerate(df):
+            series = df[column]
+            max_length = max(series.astype(str).str.len().max(), len(str(series.name)))
+            worksheet.set_column(index, index, width=max_length + 5)
+    writer.save()
+
+
+def main():
+    datasets = file_list(path="data/*.txt")
+    greedy, local = create_dataframes()
+    populated_greedy, populated_local = script_execution(datasets, greedy, local)
+    export_results(populated_greedy, populated_local)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/src/genetic_algorithm.py b/src/genetic_algorithm.py
new file mode 100644
index 0000000..3567ba6
--- /dev/null
+++ b/src/genetic_algorithm.py
@@ -0,0 +1,50 @@
+from numpy.random import choice, seed
+
+
+def get_first_random_solution(m, data):
+    seed(42)
+    random_indexes = choice(len(data.index), size=m, replace=False)
+    return data.loc[random_indexes]
+
+
+def element_in_dataframe(solution, element):
+    duplicates = solution.query(
+        f"(source == {element.source} and destination == {element.destination}) or (source == {element.destination} and destination == {element.source})"
+    )
+    return not duplicates.empty
+
+
+def replace_worst_element(previous, data):
+    solution = previous.copy()
+    worst_index = solution["distance"].astype(float).idxmin()
+    random_element = data.sample().squeeze()
+    while element_in_dataframe(solution=solution, element=random_element):
+        random_element = data.sample().squeeze()
+    solution.loc[worst_index] = random_element
+    return solution, worst_index
+
+
+def get_random_solution(previous, data):
+    solution, worst_index = replace_worst_element(previous, data)
+    previous_worst_distance = previous["distance"].loc[worst_index]
+    while solution.distance.loc[worst_index] <= previous_worst_distance:
+        solution, _ = replace_worst_element(previous=solution, data=data)
+    return solution
+
+
+def explore_neighbourhood(element, data, max_iterations=100000):
+    neighbourhood = []
+    neighbourhood.append(element)
+    for _ in range(max_iterations):
+        previous_solution = neighbourhood[-1]
+        neighbour = get_random_solution(previous=previous_solution, data=data)
+        neighbourhood.append(neighbour)
+    return neighbour
+
+
+def local_search(m, data):
+    first_solution = get_first_random_solution(m=m, data=data)
+    best_solution = explore_neighbourhood(
+        element=first_solution, data=data, max_iterations=100
+    )
+    return best_solution
diff --git a/src/main.py b/src/main.py
new file mode 100755
index 0000000..f70aaa1
--- /dev/null
+++ b/src/main.py
@@ -0,0 +1,47 @@
+from preprocessing import parse_file
+from greedy import greedy_algorithm
+from local_search import local_search
+from sys import argv
+from time import time
+
+
+def execute_algorithm(choice, n, m, data):
+    if choice == "greedy":
+        return greedy_algorithm(n, m, data)
+    elif choice == "local":
+        return local_search(m, data)
+    else:
+        print("The valid algorithm choices are 'greedy' and 'local'")
+        exit(1)
+
+
+def show_results(solutions, time_delta):
+    distance_sum = solutions["distance"].sum()
+    duplicates = solutions.duplicated().any()
+    print(solutions)
+    print("Total distance: " + str(distance_sum))
+    if not duplicates:
+        print("No duplicates found")
+    print("Execution time: " + str(time_delta))
+
+
+def usage(argv):
+    print(f"Usage: python {argv[0]} <file> <algorithm choice>")
+    print("algorithm choices:")
+    print("greedy: greedy algorithm")
+    print("local: local search algorithm")
+    exit(1)
+
+
+def main():
+    if len(argv) != 3:
+        usage(argv)
+    n, m, data = parse_file(argv[1])
+    start_time = time()
+    solutions = execute_algorithm(choice=argv[2], n=n, m=m, data=data)
+    end_time = time()
+    show_results(solutions, time_delta=end_time - start_time)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/src/preprocessing.py b/src/preprocessing.py
new file mode 100644
index 0000000..bbd4d0d
--- /dev/null
+++ b/src/preprocessing.py
@@ -0,0 +1,15 @@
+from pandas import read_table
+
+
+def read_header(filename):
+    with open(filename, "r") as f:
+        header = f.readline().split()
+    return int(header[0]), int(header[1])
+
+
+def parse_file(filename):
+    n, m = read_header(filename)
+    df = read_table(
+        filename, names=["source", "destination", "distance"], sep=" ", skiprows=[0]
+    )
+    return n, m, df