Move each algorithm into a diffent module

2021-04-15 22:07:44 +02:00 · 2021-04-15 22:07:44 +02:00 · 1f2fde1abf
commit 1f2fde1abf
parent b584239d6e
4 changed files with 144 additions and 142 deletions
--- a/src/greedy.py
+++ b/src/greedy.py
@ -0,0 +1,55 @@
+from pandas import DataFrame, Series
+
+
+def get_first_solution(n, data):
+    distance_sum = DataFrame(columns=["point", "distance"])
+    for element in range(n):
+        element_df = data.query(f"source == {element} or destination == {element}")
+        distance = element_df["distance"].sum()
+        distance_sum = distance_sum.append(
+            {"point": element, "distance": distance}, ignore_index=True
+        )
+    furthest_index = distance_sum["distance"].astype(float).idxmax()
+    furthest_row = distance_sum.iloc[furthest_index]
+    furthest_row["distance"] = 0
+    return furthest_row
+
+
+def get_different_element(original, row):
+    if row.source == original:
+        return row.destination
+    return row.source
+
+
+def get_closest_element(element, data):
+    element_df = data.query(f"source == {element} or destination == {element}")
+    closest_index = element_df["distance"].astype(float).idxmin()
+    closest_row = data.loc[closest_index]
+    closest_point = get_different_element(original=element, row=closest_row)
+    return Series(data={"point": closest_point, "distance": closest_row["distance"]})
+
+
+def explore_solutions(solutions, data):
+    closest_elements = solutions["point"].apply(func=get_closest_element, data=data)
+    furthest_index = closest_elements["distance"].astype(float).idxmax()
+    return closest_elements.iloc[furthest_index]
+
+
+def remove_duplicates(current, previous, data):
+    duplicate_free_df = data.query(
+        f"(source != {current} or destination not in @previous) and (source not in @previous or destination != {current})"
+    )
+    return duplicate_free_df
+
+
+def greedy_algorithm(n, m, data):
+    solutions = DataFrame(columns=["point", "distance"])
+    first_solution = get_first_solution(n, data)
+    solutions = solutions.append(first_solution, ignore_index=True)
+    for _ in range(m):
+        element = explore_solutions(solutions, data)
+        solutions = solutions.append(element)
+        data = remove_duplicates(
+            current=element["point"], previous=solutions["point"], data=data
+        )
+    return solutions
--- a/src/local_search.py
+++ b/src/local_search.py
@ -0,0 +1,42 @@
+from numpy.random import choice, randint, seed
+
+
+def get_first_random_solution(m, data):
+    seed(42)
+    random_indexes = choice(len(data.index), size=m)
+    return data.iloc[random_indexes]
+
+
+def replace_worst_element(previous, data):
+    solution = previous.copy()
+    worst_index = previous["distance"].astype(float).idxmin()
+    random_candidate = data.loc[randint(low=0, high=len(data.index))]
+    solution.loc[worst_index] = random_candidate
+    return solution
+
+
+def get_random_solution(previous, data):
+    solution = replace_worst_element(previous, data)
+    while solution["distance"].sum() <= previous["distance"].sum():
+        if solution.equals(previous):
+            break
+        solution = replace_worst_element(previous=solution, data=data)
+    return solution
+
+
+def explore_neighbourhood(element, data, max_iterations=100000):
+    neighbourhood = []
+    neighbourhood.append(element)
+    for _ in range(max_iterations):
+        previous_solution = neighbourhood[-1]
+        neighbour = get_random_solution(previous=previous_solution, data=data)
+        if neighbour.equals(previous_solution):
+            break
+        neighbourhood.append(neighbour)
+    return neighbour
+
+
+def local_search(m, data):
+    first_solution = get_first_random_solution(m=m, data=data)
+    best_solution = explore_neighbourhood(element=first_solution, data=data)
+    return best_solution
--- a/src/main.py
+++ b/src/main.py
@ -0,0 +1,47 @@
+from preprocessing import parse_file
+from greedy import greedy_algorithm
+from local_search import local_search
+from sys import argv
+from time import time
+
+
+def execute_algorithm(choice, n, m, data):
+    if choice == "greedy":
+        return greedy_algorithm(n, m, data)
+    elif choice == "local":
+        return local_search(m, data)
+    else:
+        print("The valid algorithm choices are 'greedy' and 'local'")
+        exit(1)
+
+
+def show_results(solutions, time_delta):
+    distance_sum = solutions["distance"].sum()
+    duplicates = solutions.duplicated().any()
+    print(solutions)
+    print("Total distance: " + str(distance_sum))
+    if not duplicates:
+        print("No duplicates found")
+    print("Execution time: " + str(time_delta))
+
+
+def usage(argv):
+    print(f"Usage: python {argv[0]} <file> <algorithm choice>")
+    print("algorithm choices:")
+    print("greedy: greedy algorithm")
+    print("local: local search algorithm")
+    exit(1)
+
+
+def main():
+    if len(argv) != 3:
+        usage(argv)
+    n, m, data = parse_file(argv[1])
+    start_time = time()
+    solutions = execute_algorithm(choice=argv[2], n=n, m=m, data=data)
+    end_time = time()
+    show_results(solutions, time_delta=end_time - start_time)
+
+
+if __name__ == "__main__":
+    main()
--- a/src/processing.py
+++ b/src/processing.py
@ -1,142 +0,0 @@
-from preprocessing import parse_file
-from numpy.random import choice, randint, seed
-from pandas import DataFrame, Series
-from sys import argv
-from time import time
-
-
-def get_first_solution(n, data):
-    distance_sum = DataFrame(columns=["point", "distance"])
-    for element in range(n):
-        element_df = data.query(f"source == {element} or destination == {element}")
-        distance = element_df["distance"].sum()
-        distance_sum = distance_sum.append(
-            {"point": element, "distance": distance}, ignore_index=True
-        )
-    furthest_index = distance_sum["distance"].astype(float).idxmax()
-    furthest_row = distance_sum.iloc[furthest_index]
-    furthest_row["distance"] = 0
-    return furthest_row
-
-
-def get_different_element(original, row):
-    if row.source == original:
-        return row.destination
-    return row.source
-
-
-def get_closest_element(element, data):
-    element_df = data.query(f"source == {element} or destination == {element}")
-    closest_index = element_df["distance"].astype(float).idxmin()
-    closest_row = data.loc[closest_index]
-    closest_point = get_different_element(original=element, row=closest_row)
-    return Series(data={"point": closest_point, "distance": closest_row["distance"]})
-
-
-def explore_solutions(solutions, data):
-    closest_elements = solutions["point"].apply(func=get_closest_element, data=data)
-    furthest_index = closest_elements["distance"].astype(float).idxmax()
-    return closest_elements.iloc[furthest_index]
-
-
-def remove_duplicates(current, previous, data):
-    duplicate_free_df = data.query(
-        f"(source != {current} or destination not in @previous) and (source not in @previous or destination != {current})"
-    )
-    return duplicate_free_df
-
-
-def greedy_algorithm(n, m, data):
-    solutions = DataFrame(columns=["point", "distance"])
-    first_solution = get_first_solution(n, data)
-    solutions = solutions.append(first_solution, ignore_index=True)
-    for _ in range(m):
-        element = explore_solutions(solutions, data)
-        solutions = solutions.append(element)
-        data = remove_duplicates(
-            current=element["point"], previous=solutions["point"], data=data
-        )
-    return solutions
-
-
-def get_first_random_solution(m, data):
-    seed(42)
-    random_indexes = choice(len(data.index), size=m)
-    return data.iloc[random_indexes]
-
-
-def replace_worst_element(previous, data):
-    solution = previous.copy()
-    worst_index = previous["distance"].astype(float).idxmin()
-    random_candidate = data.loc[randint(low=0, high=len(data.index))]
-    solution.loc[worst_index] = random_candidate
-    return solution
-
-
-def get_random_solution(previous, data):
-    solution = replace_worst_element(previous, data)
-    while solution["distance"].sum() <= previous["distance"].sum():
-        if solution.equals(previous):
-            break
-        solution = replace_worst_element(previous=solution, data=data)
-    return solution
-
-
-def explore_neighbourhood(element, data, max_iterations=100000):
-    neighbourhood = []
-    neighbourhood.append(element)
-    for _ in range(max_iterations):
-        previous_solution = neighbourhood[-1]
-        neighbour = get_random_solution(previous=previous_solution, data=data)
-        if neighbour.equals(previous_solution):
-            break
-        neighbourhood.append(neighbour)
-    return neighbour
-
-
-def local_search(m, data):
-    first_solution = get_first_random_solution(m=m, data=data)
-    best_solution = explore_neighbourhood(element=first_solution, data=data)
-    return best_solution
-
-
-def execute_algorithm(choice, n, m, data):
-    if choice == "greedy":
-        return greedy_algorithm(n, m, data)
-    elif choice == "local":
-        return local_search(m, data)
-    else:
-        print("The valid algorithm choices are 'greedy' and 'local'")
-        exit(1)
-
-
-def show_results(solutions, time_delta):
-    distance_sum = solutions["distance"].sum()
-    duplicates = solutions.duplicated().any()
-    print(solutions)
-    print("Total distance: " + str(distance_sum))
-    if not duplicates:
-        print("No duplicates found")
-    print("Execution time: " + str(time_delta))
-
-
-def usage(argv):
-    print(f"Usage: python {argv[0]} <file> <algorithm choice>")
-    print("algorithm choices:")
-    print("greedy: greedy algorithm")
-    print("local: local search algorithm")
-    exit(1)
-
-
-def main():
-    if len(argv) != 3:
-        usage(argv)
-    n, m, data = parse_file(argv[1])
-    start_time = time()
-    solutions = execute_algorithm(choice=argv[2], n=n, m=m, data=data)
-    end_time = time()
-    show_results(solutions, time_delta=end_time - start_time)
-
-
-if __name__ == "__main__":
-    main()