diff --git a/src/genetic_algorithm.py b/src/genetic_algorithm.py index 90f68fe..bbf8dd8 100644 --- a/src/genetic_algorithm.py +++ b/src/genetic_algorithm.py @@ -1,23 +1,40 @@ -from numpy import put, sum, append, where, zeros -from numpy.random import choice, seed +from numpy import sum, append, arange, delete, where +from numpy.random import randint, choice +from pandas import DataFrame -def generate_first_solution(n, m): - seed(42) - solution = zeros(shape=n, dtype=bool) - random_indices = choice(n, size=m, replace=False) - put(solution, ind=random_indices, v=True) +def get_row_distance(source, destination, data): + row = data.query( + """(source == @source and destination == @destination) or \ + (source == @destination and destination == @source)""" + ) + return row["distance"].values[0] + + +def compute_distance(element, solution, data): + accumulator = 0 + distinct_elements = solution.query(f"point != {element}") + for _, item in distinct_elements.iterrows(): + accumulator += get_row_distance( + source=element, + destination=item.point, + data=data, + ) + return accumulator + + +def generate_first_solution(n, m, data): + solution = DataFrame(columns=["point", "distance"]) + solution["point"] = choice(n, size=m, replace=False) + solution["distance"] = solution["point"].apply( + func=compute_distance, solution=solution, data=data + ) return solution -def get_genotype(element): - genotype = where(element == True) - return genotype[0] - - def evaluate_element(element, data): fitness = [] - genotype = get_genotype(element) + genotype = element.point.values distances = data.query(f"source in @genotype and destination in @genotype") for item in genotype[:-1]: element_df = distances.query(f"source == {item} or destination == {item}") @@ -27,44 +44,95 @@ def evaluate_element(element, data): return sum(fitness) +def select_random_genes(matching_genes, parents, m): + cutoff = randint(m) + distinct_genes = delete(arange(m), matching_genes) + first_parent_genes = parents[0].point.iloc[distinct_genes[cutoff:]] + second_parent_genes = parents[1].point.iloc[distinct_genes[:cutoff]] + return first_parent_genes, second_parent_genes + + +def repair_offspring(offspring, parents, m): + while len(offspring) != m: + if len(offspring) > m: + best_index = offspring["distance"].astype(float).idxmax() + offspring.drop(index=best_index, inplace=True) + elif len(offspring) < m: + random_parent = parents[randint(len(parents))] + best_index = random_parent["distance"].astype(float).idxmax() + best_point = random_parent["point"].loc[best_index] + offspring = offspring.append( + {"point": best_point, "distance": 0}, ignore_index=True + ) + return offspring + + +def get_matching_genes(parents): + first_parent = parents[0].point + second_parent = parents[1].point + return where(first_parent == second_parent) + + +def uniform_crossover(parents, m): + offspring = DataFrame(columns=["point", "distance"]) + matching_genes = get_matching_genes(parents) + offspring["point"] = parents[0].point.iloc[matching_genes] + first_genes, second_genes = select_random_genes(matching_genes, parents, m) + offspring["point"] = offspring["point"].append(first_genes) + offspring["point"] = offspring["point"].append(second_genes) + offspring["distance"] = 0 + viable_offspring = repair_offspring(offspring, parents, m) + return viable_offspring + + +def position_crossover(parents, n): + genotypes = [parents[0].point.values, parents[1].point.values] + + +def crossover(mode, parents, m): + if mode == "uniform": + return uniform_crossover(parents, m) + return position_crossover(parents, m) + + def element_in_dataframe(solution, element): - duplicates = solution.query( - f"(source == {element.source} and destination == {element.destination}) or (source == {element.destination} and destination == {element.source})" - ) + duplicates = solution.query(f"point == {element}") return not duplicates.empty -def replace_worst_element(previous, data): +def replace_worst_element(previous, n, data): solution = previous.copy() worst_index = solution["distance"].astype(float).idxmin() - random_element = data.sample().squeeze() + random_element = randint(n) while element_in_dataframe(solution=solution, element=random_element): - random_element = data.sample().squeeze() - solution.loc[worst_index] = random_element - return solution, worst_index - - -def get_random_solution(previous, data): - solution, worst_index = replace_worst_element(previous, data) - previous_worst_distance = previous["distance"].loc[worst_index] - while solution.distance.loc[worst_index] <= previous_worst_distance: - solution, _ = replace_worst_element(previous=solution, data=data) + random_element = randint(n) + solution["point"].loc[worst_index] = random_element + solution["distance"].loc[worst_index] = compute_distance( + element=solution["point"].loc[worst_index], solution=solution, data=data + ) return solution -def explore_neighbourhood(element, data, max_iterations=100000): +def get_random_solution(previous, n, data): + solution = replace_worst_element(previous, n, data) + while solution["distance"].sum() <= previous["distance"].sum(): + solution = replace_worst_element(previous=solution, n=n, data=data) + return solution + + +def explore_neighbourhood(element, n, data, max_iterations=100000): neighbourhood = [] neighbourhood.append(element) for _ in range(max_iterations): previous_solution = neighbourhood[-1] - neighbour = get_random_solution(previous=previous_solution, data=data) + neighbour = get_random_solution(previous=previous_solution, n=n, data=data) neighbourhood.append(neighbour) return neighbour def genetic_algorithm(n, m, data): - first_solution = generate_first_solution(n=n, m=m) + first_solution = generate_first_solution(n, m, data) best_solution = explore_neighbourhood( - element=first_solution, data=data, max_iterations=100 + element=first_solution, n=n, data=data, max_iterations=100 ) return best_solution