Add files from previous lab

2021-04-29 12:33:46 +02:00 · 2021-04-29 12:33:46 +02:00 · 103a5bdfb0
commit 103a5bdfb0
parent bca17330ae
5 changed files with 217 additions and 2 deletions
--- a/shell.nix
+++ b/shell.nix
@ -1,9 +1,12 @@
-{ pkgs ? import <nixpkgs> {} }:
+{ pkgs ? import <nixpkgs> { } }:
 with pkgs;
 mkShell {
  buildInputs = [
-
+    python39
    python39Packages.numpy
    python39Packages.pandas
    python39Packages.XlsxWriter
  ];
 }
--- a/src/execution.py
+++ b/src/execution.py
@ -0,0 +1,100 @@
 from glob import glob
 from subprocess import run
 from sys import executable
 from numpy import mean, std
 from pandas import DataFrame, ExcelWriter
 def file_list(path):
    file_list = []
    for fname in glob(path):
        file_list.append(fname)
    return file_list
 def create_dataframes():
    greedy = DataFrame()
    local = DataFrame()
    return greedy, local
 def process_output(results):
    distances = []
    time = []
    for element in results:
        for line in element:
            if line.startswith(bytes("Total distance:", encoding="utf-8")):
                line_elements = line.split(sep=bytes(":", encoding="utf-8"))
                distances.append(float(line_elements[1]))
            if line.startswith(bytes("Execution time:", encoding="utf-8")):
                line_elements = line.split(sep=bytes(":", encoding="utf-8"))
                time.append(float(line_elements[1]))
    return distances, time
 def populate_dataframes(greedy, local, greedy_list, local_list, dataset):
    greedy_distances, greedy_time = process_output(greedy_list)
    local_distances, local_time = process_output(local_list)
    greedy_dict = {
        "dataset": dataset.removeprefix("data/"),
        "media distancia": mean(greedy_distances),
        "desviacion distancia": std(greedy_distances),
        "media tiempo": mean(greedy_time),
        "desviacion tiempo": std(greedy_time),
    }
    local_dict = {
        "dataset": dataset.removeprefix("data/"),
        "media distancia": mean(local_distances),
        "desviacion distancia": std(local_distances),
        "media tiempo": mean(local_time),
        "desviacion tiempo": std(local_time),
    }
    greedy = greedy.append(greedy_dict, ignore_index=True)
    local = local.append(local_dict, ignore_index=True)
    return greedy, local
 def script_execution(filenames, greedy, local, iterations=3):
    script = "src/main.py"
    for dataset in filenames:
        print(f"Running on dataset {dataset}")
        greedy_list = []
        local_list = []
        for _ in range(iterations):
            greedy_cmd = run(
                [executable, script, dataset, "greedy"], capture_output=True
            ).stdout.splitlines()
            local_cmd = run(
                [executable, script, dataset, "local"], capture_output=True
            ).stdout.splitlines()
            greedy_list.append(greedy_cmd)
            local_list.append(local_cmd)
        greedy, local = populate_dataframes(
            greedy, local, greedy_list, local_list, dataset
        )
    return greedy, local
 def export_results(greedy, local):
    dataframes = {"Greedy": greedy, "Local search": local}
    writer = ExcelWriter(path="docs/algorithm-results.xlsx", engine="xlsxwriter")
    for name, df in dataframes.items():
        df.to_excel(writer, sheet_name=name, index=False)
        worksheet = writer.sheets[name]
        for index, column in enumerate(df):
            series = df[column]
            max_length = max(series.astype(str).str.len().max(), len(str(series.name)))
            worksheet.set_column(index, index, width=max_length + 5)
    writer.save()
 def main():
    datasets = file_list(path="data/*.txt")
    greedy, local = create_dataframes()
    populated_greedy, populated_local = script_execution(datasets, greedy, local)
    export_results(populated_greedy, populated_local)
 if __name__ == "__main__":
    main()
--- a/src/genetic_algorithm.py
+++ b/src/genetic_algorithm.py
@ -0,0 +1,50 @@
 from numpy.random import choice, seed
 def get_first_random_solution(m, data):
    seed(42)
    random_indexes = choice(len(data.index), size=m, replace=False)
    return data.loc[random_indexes]
 def element_in_dataframe(solution, element):
    duplicates = solution.query(
        f"(source == {element.source} and destination == {element.destination}) or (source == {element.destination} and destination == {element.source})"
    )
    return not duplicates.empty
 def replace_worst_element(previous, data):
    solution = previous.copy()
    worst_index = solution["distance"].astype(float).idxmin()
    random_element = data.sample().squeeze()
    while element_in_dataframe(solution=solution, element=random_element):
        random_element = data.sample().squeeze()
    solution.loc[worst_index] = random_element
    return solution, worst_index
 def get_random_solution(previous, data):
    solution, worst_index = replace_worst_element(previous, data)
    previous_worst_distance = previous["distance"].loc[worst_index]
    while solution.distance.loc[worst_index] <= previous_worst_distance:
        solution, _ = replace_worst_element(previous=solution, data=data)
    return solution
 def explore_neighbourhood(element, data, max_iterations=100000):
    neighbourhood = []
    neighbourhood.append(element)
    for _ in range(max_iterations):
        previous_solution = neighbourhood[-1]
        neighbour = get_random_solution(previous=previous_solution, data=data)
        neighbourhood.append(neighbour)
    return neighbour
 def local_search(m, data):
    first_solution = get_first_random_solution(m=m, data=data)
    best_solution = explore_neighbourhood(
        element=first_solution, data=data, max_iterations=100
    )
    return best_solution
--- a/src/main.py
+++ b/src/main.py
@ -0,0 +1,47 @@
 from preprocessing import parse_file
 from greedy import greedy_algorithm
 from local_search import local_search
 from sys import argv
 from time import time
 def execute_algorithm(choice, n, m, data):
    if choice == "greedy":
        return greedy_algorithm(n, m, data)
    elif choice == "local":
        return local_search(m, data)
    else:
        print("The valid algorithm choices are 'greedy' and 'local'")
        exit(1)
 def show_results(solutions, time_delta):
    distance_sum = solutions["distance"].sum()
    duplicates = solutions.duplicated().any()
    print(solutions)
    print("Total distance: " + str(distance_sum))
    if not duplicates:
        print("No duplicates found")
    print("Execution time: " + str(time_delta))
 def usage(argv):
    print(f"Usage: python {argv[0]} <file> <algorithm choice>")
    print("algorithm choices:")
    print("greedy: greedy algorithm")
    print("local: local search algorithm")
    exit(1)
 def main():
    if len(argv) != 3:
        usage(argv)
    n, m, data = parse_file(argv[1])
    start_time = time()
    solutions = execute_algorithm(choice=argv[2], n=n, m=m, data=data)
    end_time = time()
    show_results(solutions, time_delta=end_time - start_time)
 if __name__ == "__main__":
    main()
--- a/src/preprocessing.py
+++ b/src/preprocessing.py
@ -0,0 +1,15 @@
 from pandas import read_table
 def read_header(filename):
    with open(filename, "r") as f:
        header = f.readline().split()
    return int(header[0]), int(header[1])
 def parse_file(filename):
    n, m = read_header(filename)
    df = read_table(
        filename, names=["source", "destination", "distance"], sep=" ", skiprows=[0]
    )
    return n, m, df