MH-P1/src/processing.py

67 lines
2.0 KiB
Python
Raw Normal View History

2021-03-22 17:57:25 +01:00
from preprocessing import parse_file
2021-04-11 22:07:57 +02:00
from pandas import DataFrame
2021-03-22 17:57:25 +01:00
from sys import argv
def get_first_solution(n, data):
distance_sum = DataFrame(columns=["point", "distance"])
for element in range(n):
element_df = data.query(f"source == {element} or destination == {element}")
distance = element_df["distance"].sum()
distance_sum = distance_sum.append(
{"point": element, "distance": distance}, ignore_index=True
)
furthest_index = distance_sum["distance"].idxmax()
furthest_row = distance_sum.iloc[furthest_index]
furthest_row["distance"] = 0
2021-03-22 17:57:25 +01:00
return furthest_row
2021-04-11 22:07:57 +02:00
def get_different_element(original, row):
if row.source == original:
return row.destination
return row.source
def get_furthest_element(element, data):
element_df = data.query(f"source == {element} or destination == {element}")
furthest_index = element_df["distance"].idxmax()
furthest_row = data.iloc[furthest_index]
furthest_point = get_different_element(original=element, row=furthest_row)
2021-04-11 22:22:18 +02:00
furthest_element = {"point": furthest_point, "distance": furthest_row["distance"]}
return furthest_element, furthest_index
2021-04-11 22:07:57 +02:00
2021-03-22 17:57:25 +01:00
def greedy_algorithm(n, m, data):
solutions = DataFrame(columns=["point", "distance"])
first_solution = get_first_solution(n, data)
solutions = solutions.append(first_solution, ignore_index=True)
2021-03-22 17:57:25 +01:00
for _ in range(m):
2021-04-11 22:07:57 +02:00
last_solution = solutions["point"].tail(n=1)
2021-04-11 22:22:18 +02:00
centroid, index = get_furthest_element(element=int(last_solution), data=data)
2021-04-11 22:07:57 +02:00
solutions = solutions.append(dict(centroid), ignore_index=True)
2021-04-11 22:22:18 +02:00
data = data.drop(index)
return solutions
2021-03-22 17:57:25 +01:00
2021-04-11 22:07:57 +02:00
# NOTE In each step, switch to the element that gives the least amount
def local_search():
pass
2021-03-22 17:57:25 +01:00
def usage(argv):
print(f"Usage: python {argv[0]} <file>")
exit(1)
def main():
if len(argv) != 2:
usage(argv)
n, m, data = parse_file(argv[1])
2021-04-11 22:22:18 +02:00
solutions = greedy_algorithm(n, m, data)
print(solutions)
2021-03-22 17:57:25 +01:00
if __name__ == "__main__":
main()