from numpy import sum, append, arange, delete, where from numpy.random import randint, choice, shuffle, random from pandas import DataFrame from math import ceil def get_row_distance(source, destination, data): row = data.query( """(source == @source and destination == @destination) or \ (source == @destination and destination == @source)""" ) return row["distance"].values[0] def compute_distance(element, solution, data): accumulator = 0 distinct_elements = solution.query(f"point != {element}") for _, item in distinct_elements.iterrows(): accumulator += get_row_distance( source=element, destination=item.point, data=data ) return accumulator def generate_first_solution(n, m, data): solution = DataFrame(columns=["point", "distance"]) solution["point"] = choice(n, size=m, replace=False) solution["distance"] = solution["point"].apply( func=compute_distance, solution=solution, data=data ) return solution def evaluate_element(element, data): fitness = [] genotype = element.point.values distances = data.query(f"source in @genotype and destination in @genotype") for item in genotype[:-1]: element_df = distances.query(f"source == {item} or destination == {item}") max_distance = element_df["distance"].astype(float).max() fitness = append(arr=fitness, values=max_distance) distances = distances.query(f"source != {item} and destination != {item}") return sum(fitness) def select_distinct_genes(matching_genes, parents, m): cutoff = randint(m) distinct_indexes = delete(arange(m), matching_genes) first_parent_genes = parents[0].point.iloc[distinct_indexes[cutoff:]] second_parent_genes = parents[1].point.iloc[distinct_indexes[:cutoff]] return first_parent_genes, second_parent_genes def select_random_genes(matching_genes, parents, m): random_parent = parents[randint(len(parents))] distinct_indexes = delete(arange(m), matching_genes) genes = random_parent.point.iloc[distinct_indexes].values shuffle(genes) return genes def repair_offspring(offspring, parents, m): while len(offspring) != m: if len(offspring) > m: best_index = offspring["distance"].astype(float).idxmax() offspring.drop(index=best_index, inplace=True) elif len(offspring) < m: random_parent = parents[randint(len(parents))] best_index = random_parent["distance"].astype(float).idxmax() best_point = random_parent["point"].loc[best_index] offspring = offspring.append( {"point": best_point, "distance": 0}, ignore_index=True ) random_parent.drop(index=best_index, inplace=True) return offspring def get_matching_genes(parents): first_parent = parents[0].point second_parent = parents[1].point return where(first_parent == second_parent) def populate_offspring(values): offspring = DataFrame(columns=["point", "distance"]) for element in values: aux = DataFrame(columns=["point", "distance"]) aux["point"] = element offspring = offspring.append(aux) offspring["distance"] = 0 offspring = offspring[1:] return offspring def uniform_crossover(parents, m): matching_indexes = get_matching_genes(parents) matching_genes = parents[0].point.iloc[matching_indexes] first_genes, second_genes = select_distinct_genes(matching_genes, parents, m) offspring = populate_offspring(values=[matching_genes, first_genes, second_genes]) viable_offspring = repair_offspring(offspring, parents, m) return viable_offspring def position_crossover(parents, m): matching_genes = get_matching_genes(parents) shuffled_genes = select_random_genes(matching_genes, parents, m) offspring = populate_offspring(values=[matching_genes, shuffled_genes]) return offspring def crossover(mode, parents, m): if mode == "uniform": return uniform_crossover(parents, m) return position_crossover(parents, m) def element_in_dataframe(solution, element): duplicates = solution.query(f"point == {element}") return not duplicates.empty def select_new_gene(individual, n): while True: new_gene = randint(n) if not element_in_dataframe(solution=individual, element=new_gene): return new_gene def mutate(population, n, probability=0.001): expected_mutations = len(population) * n * probability individuals = [] genes = [] for _ in range(ceil(expected_mutations)): individuals.append(randint(n)) genes.append(population[individuals[-1]].sample().index) for ind, gen in zip(individuals, genes): individual = population[ind] individual["point"].iloc[gen] = select_new_gene(individual, n) individual["distance"].iloc[gen] = 0 return population def tournament_selection(solution): individuals = solution.sample(n=2) best_index = solution["distance"].astype(float).idxmax() return individuals.iloc[best_index] def replace_worst_element(previous, n, data): solution = previous.copy() worst_index = solution["distance"].astype(float).idxmin() random_element = randint(n) while element_in_dataframe(solution=solution, element=random_element): random_element = randint(n) solution["point"].loc[worst_index] = random_element solution["distance"].loc[worst_index] = compute_distance( element=solution["point"].loc[worst_index], solution=solution, data=data ) return solution def get_random_solution(previous, n, data): solution = replace_worst_element(previous, n, data) while solution["distance"].sum() <= previous["distance"].sum(): solution = replace_worst_element(previous=solution, n=n, data=data) return solution def explore_neighbourhood(element, n, data, max_iterations=100000): neighbourhood = [] neighbourhood.append(element) for _ in range(max_iterations): previous_solution = neighbourhood[-1] neighbour = get_random_solution(previous=previous_solution, n=n, data=data) neighbourhood.append(neighbour) return neighbour def genetic_algorithm(n, m, data): first_solution = generate_first_solution(n, m, data) best_solution = explore_neighbourhood( element=first_solution, n=n, data=data, max_iterations=100 ) return best_solution