2021-06-19 20:22:39 +02:00
|
|
|
from numpy import sum, append, arange, delete, intersect1d
|
2021-06-17 19:25:16 +02:00
|
|
|
from numpy.random import randint, choice, shuffle
|
2021-05-24 18:17:40 +02:00
|
|
|
from pandas import DataFrame
|
2021-06-17 19:15:50 +02:00
|
|
|
from math import ceil
|
2021-06-18 18:54:34 +02:00
|
|
|
from functools import partial
|
|
|
|
from multiprocessing import Pool
|
|
|
|
|
|
|
|
from preprocessing import parse_file
|
2021-04-29 12:33:46 +02:00
|
|
|
|
|
|
|
|
2021-05-24 18:17:40 +02:00
|
|
|
def get_row_distance(source, destination, data):
|
|
|
|
row = data.query(
|
|
|
|
"""(source == @source and destination == @destination) or \
|
|
|
|
(source == @destination and destination == @source)"""
|
|
|
|
)
|
|
|
|
return row["distance"].values[0]
|
|
|
|
|
|
|
|
|
2021-06-17 22:44:39 +02:00
|
|
|
def compute_distance(element, individual, data):
|
2021-05-24 18:17:40 +02:00
|
|
|
accumulator = 0
|
2021-06-17 22:44:39 +02:00
|
|
|
distinct_elements = individual.query(f"point != {element}")
|
2021-05-24 18:17:40 +02:00
|
|
|
for _, item in distinct_elements.iterrows():
|
|
|
|
accumulator += get_row_distance(
|
2021-06-17 19:15:50 +02:00
|
|
|
source=element, destination=item.point, data=data
|
2021-05-24 18:17:40 +02:00
|
|
|
)
|
|
|
|
return accumulator
|
|
|
|
|
|
|
|
|
2021-06-17 22:44:39 +02:00
|
|
|
def generate_individual(n, m, data):
|
|
|
|
individual = DataFrame(columns=["point", "distance", "fitness"])
|
|
|
|
individual["point"] = choice(n, size=m, replace=False)
|
|
|
|
individual["distance"] = individual["point"].apply(
|
|
|
|
func=compute_distance, individual=individual, data=data
|
2021-05-24 18:17:40 +02:00
|
|
|
)
|
2021-06-17 22:44:39 +02:00
|
|
|
return individual
|
2021-05-10 19:25:06 +02:00
|
|
|
|
|
|
|
|
2021-06-17 22:44:39 +02:00
|
|
|
def evaluate_individual(individual, data):
|
2021-05-17 20:42:17 +02:00
|
|
|
fitness = []
|
2021-06-17 22:44:39 +02:00
|
|
|
genotype = individual.point.values
|
2021-05-17 20:50:26 +02:00
|
|
|
distances = data.query(f"source in @genotype and destination in @genotype")
|
|
|
|
for item in genotype[:-1]:
|
2021-05-17 20:42:17 +02:00
|
|
|
element_df = distances.query(f"source == {item} or destination == {item}")
|
|
|
|
max_distance = element_df["distance"].astype(float).max()
|
|
|
|
fitness = append(arr=fitness, values=max_distance)
|
|
|
|
distances = distances.query(f"source != {item} and destination != {item}")
|
2021-06-18 18:54:34 +02:00
|
|
|
individual["fitness"] = sum(fitness)
|
|
|
|
return individual
|
2021-04-29 12:33:46 +02:00
|
|
|
|
|
|
|
|
2021-05-25 16:53:59 +02:00
|
|
|
def select_distinct_genes(matching_genes, parents, m):
|
2021-06-19 19:13:14 +02:00
|
|
|
first_parent = parents[0].query("point not in @matching_genes")
|
|
|
|
second_parent = parents[1].query("point not in @matching_genes")
|
2021-06-20 04:54:58 +02:00
|
|
|
cutoff = randint(m - len(matching_genes))
|
2021-06-19 19:13:14 +02:00
|
|
|
first_parent_genes = first_parent.point.values[cutoff:]
|
|
|
|
second_parent_genes = second_parent.point.values[:cutoff]
|
2021-05-24 18:17:40 +02:00
|
|
|
return first_parent_genes, second_parent_genes
|
|
|
|
|
|
|
|
|
2021-05-25 16:53:59 +02:00
|
|
|
def select_random_genes(matching_genes, parents, m):
|
|
|
|
random_parent = parents[randint(len(parents))]
|
|
|
|
distinct_indexes = delete(arange(m), matching_genes)
|
|
|
|
genes = random_parent.point.iloc[distinct_indexes].values
|
|
|
|
shuffle(genes)
|
|
|
|
return genes
|
|
|
|
|
|
|
|
|
2021-06-20 04:54:58 +02:00
|
|
|
def select_random_parent(parents):
|
|
|
|
random_index = randint(len(parents))
|
|
|
|
random_parent = parents[random_index]
|
|
|
|
if random_parent.point.empty:
|
|
|
|
opposite_index = 1 - random_index
|
|
|
|
random_parent = parents[opposite_index]
|
|
|
|
return random_parent
|
|
|
|
|
|
|
|
|
2021-05-24 18:17:40 +02:00
|
|
|
def repair_offspring(offspring, parents, m):
|
|
|
|
while len(offspring) != m:
|
|
|
|
if len(offspring) > m:
|
2021-06-19 19:13:14 +02:00
|
|
|
best_index = offspring["distance"].idxmax()
|
2021-05-24 18:17:40 +02:00
|
|
|
offspring.drop(index=best_index, inplace=True)
|
|
|
|
elif len(offspring) < m:
|
2021-06-20 04:54:58 +02:00
|
|
|
# NOTE Refactor into its own function
|
2021-06-19 19:13:14 +02:00
|
|
|
while True:
|
2021-06-20 04:54:58 +02:00
|
|
|
random_parent = select_random_parent(parents)
|
2021-06-19 19:13:14 +02:00
|
|
|
best_index = random_parent["distance"].idxmax()
|
|
|
|
best_point = random_parent["point"].loc[best_index]
|
|
|
|
random_parent.drop(index=best_index, inplace=True)
|
2021-06-20 04:54:58 +02:00
|
|
|
if best_point not in offspring.point.values:
|
2021-06-19 19:13:14 +02:00
|
|
|
break
|
2021-05-24 18:17:40 +02:00
|
|
|
offspring = offspring.append(
|
2021-06-19 19:13:14 +02:00
|
|
|
{"point": best_point, "distance": 0, "fitness": 0}, ignore_index=True
|
2021-05-24 18:17:40 +02:00
|
|
|
)
|
|
|
|
return offspring
|
|
|
|
|
|
|
|
|
|
|
|
def get_matching_genes(parents):
|
2021-06-19 19:13:14 +02:00
|
|
|
first_parent = parents[0].point.values
|
|
|
|
second_parent = parents[1].point.values
|
2021-06-19 20:22:39 +02:00
|
|
|
return intersect1d(first_parent, second_parent)
|
2021-05-24 18:17:40 +02:00
|
|
|
|
|
|
|
|
2021-05-25 16:53:59 +02:00
|
|
|
def populate_offspring(values):
|
2021-06-17 22:45:42 +02:00
|
|
|
offspring = DataFrame(columns=["point", "distance", "fitness"])
|
2021-05-25 16:53:59 +02:00
|
|
|
for element in values:
|
2021-06-17 22:45:42 +02:00
|
|
|
aux = DataFrame(columns=["point", "distance", "fitness"])
|
2021-05-25 16:53:59 +02:00
|
|
|
aux["point"] = element
|
|
|
|
offspring = offspring.append(aux)
|
2021-05-24 18:17:40 +02:00
|
|
|
offspring["distance"] = 0
|
2021-06-17 22:45:42 +02:00
|
|
|
offspring["fitness"] = 0
|
2021-05-25 16:53:59 +02:00
|
|
|
offspring = offspring[1:]
|
|
|
|
return offspring
|
|
|
|
|
|
|
|
|
|
|
|
def uniform_crossover(parents, m):
|
2021-06-19 19:13:14 +02:00
|
|
|
matching_genes = get_matching_genes(parents)
|
2021-05-25 16:53:59 +02:00
|
|
|
first_genes, second_genes = select_distinct_genes(matching_genes, parents, m)
|
|
|
|
offspring = populate_offspring(values=[matching_genes, first_genes, second_genes])
|
2021-05-24 18:17:40 +02:00
|
|
|
viable_offspring = repair_offspring(offspring, parents, m)
|
|
|
|
return viable_offspring
|
|
|
|
|
|
|
|
|
2021-05-25 16:53:59 +02:00
|
|
|
def position_crossover(parents, m):
|
|
|
|
matching_genes = get_matching_genes(parents)
|
|
|
|
shuffled_genes = select_random_genes(matching_genes, parents, m)
|
2021-06-17 22:45:14 +02:00
|
|
|
first_offspring = populate_offspring(values=[matching_genes, shuffled_genes])
|
|
|
|
second_offspring = populate_offspring(values=[matching_genes, shuffled_genes])
|
|
|
|
return [first_offspring, second_offspring]
|
2021-05-24 18:17:40 +02:00
|
|
|
|
|
|
|
|
|
|
|
def crossover(mode, parents, m):
|
2021-06-20 04:54:58 +02:00
|
|
|
split_parents = zip(*[iter(parents)] * 2)
|
2021-05-24 18:17:40 +02:00
|
|
|
if mode == "uniform":
|
2021-06-19 19:13:14 +02:00
|
|
|
crossover_func = partial(uniform_crossover, m=m)
|
|
|
|
else:
|
|
|
|
crossover_func = partial(position_crossover, m=m)
|
|
|
|
offspring = [*map(crossover_func, split_parents)]
|
|
|
|
return offspring
|
2021-05-24 18:17:40 +02:00
|
|
|
|
|
|
|
|
2021-06-17 22:44:39 +02:00
|
|
|
def element_in_dataframe(individual, element):
|
|
|
|
duplicates = individual.query(f"point == {element}")
|
2021-05-31 18:24:20 +02:00
|
|
|
return not duplicates.empty
|
|
|
|
|
|
|
|
|
2021-06-17 19:15:50 +02:00
|
|
|
def select_new_gene(individual, n):
|
|
|
|
while True:
|
|
|
|
new_gene = randint(n)
|
2021-06-17 22:44:39 +02:00
|
|
|
if not element_in_dataframe(individual=individual, element=new_gene):
|
2021-06-17 19:15:50 +02:00
|
|
|
return new_gene
|
|
|
|
|
|
|
|
|
2021-06-20 04:54:58 +02:00
|
|
|
def mutate(offspring, data, probability=0.001):
|
|
|
|
expected_mutations = len(offspring) * n * probability
|
2021-06-17 19:15:50 +02:00
|
|
|
individuals = []
|
|
|
|
genes = []
|
|
|
|
for _ in range(ceil(expected_mutations)):
|
2021-06-20 04:54:58 +02:00
|
|
|
individuals.append(randint(len(offspring)))
|
2021-06-17 19:25:16 +02:00
|
|
|
current_individual = individuals[-1]
|
2021-06-20 04:54:58 +02:00
|
|
|
genes.append(offspring[current_individual].sample().index)
|
2021-06-17 19:15:50 +02:00
|
|
|
for ind, gen in zip(individuals, genes):
|
2021-06-20 04:54:58 +02:00
|
|
|
individual = offspring[ind]
|
2021-06-17 19:15:50 +02:00
|
|
|
individual["point"].iloc[gen] = select_new_gene(individual, n)
|
2021-06-20 04:54:58 +02:00
|
|
|
individual["distance"].iloc[gen] = compute_distance(
|
|
|
|
element=individual["point"].iloc[gen].values[0],
|
|
|
|
individual=individual,
|
|
|
|
data=data,
|
|
|
|
)
|
|
|
|
return offspring
|
|
|
|
|
|
|
|
|
|
|
|
def get_individual_index(population, element):
|
|
|
|
for index in range(len(population)):
|
|
|
|
if population[index].fitness.values[0] == element.fitness.values[0]:
|
|
|
|
return index
|
2021-05-31 18:12:23 +02:00
|
|
|
|
|
|
|
|
2021-06-20 04:54:58 +02:00
|
|
|
def tournament_selection(population):
|
|
|
|
individuals = [population[randint(len(population))] for _ in range(2)]
|
|
|
|
best_element = max(individuals, key=lambda x: x.fitness.values[0])
|
|
|
|
population_index = get_individual_index(population, best_element)
|
|
|
|
return best_element, population_index
|
2021-04-29 12:33:46 +02:00
|
|
|
|
|
|
|
|
2021-06-17 22:45:59 +02:00
|
|
|
def generational_replacement(previous_population, current_population):
|
|
|
|
new_population = current_population
|
2021-06-18 20:06:59 +02:00
|
|
|
best_previous_individual = max(previous_population, key=lambda x: all(x.fitness))
|
2021-06-17 22:45:59 +02:00
|
|
|
if best_previous_individual not in new_population:
|
2021-06-18 20:06:59 +02:00
|
|
|
worst_index = new_population.index(
|
|
|
|
min(new_population, key=lambda x: all(x.fitness))
|
|
|
|
)
|
2021-06-17 22:45:59 +02:00
|
|
|
new_population[worst_index] = best_previous_individual
|
|
|
|
return new_population
|
|
|
|
|
|
|
|
|
2021-06-17 23:03:03 +02:00
|
|
|
def get_best_elements(population):
|
2021-06-18 20:06:59 +02:00
|
|
|
first_index = population.index(max(population, key=lambda x: all(x.fitness)))
|
2021-06-17 23:03:03 +02:00
|
|
|
population.pop(first_index)
|
2021-06-18 20:06:59 +02:00
|
|
|
second_index = population.index(max(population, key=lambda x: all(x.fitness)))
|
2021-06-17 23:03:03 +02:00
|
|
|
return first_index, second_index
|
|
|
|
|
|
|
|
|
|
|
|
def get_worst_elements(population):
|
2021-06-18 20:06:59 +02:00
|
|
|
first_index = population.index(min(population, key=lambda x: all(x.fitness)))
|
2021-06-17 23:03:03 +02:00
|
|
|
population.pop(first_index)
|
2021-06-18 20:06:59 +02:00
|
|
|
second_index = population.index(min(population, key=lambda x: all(x.fitness)))
|
2021-06-17 23:03:03 +02:00
|
|
|
return first_index, second_index
|
|
|
|
|
|
|
|
|
|
|
|
def stationary_replacement(prev_population, current_population):
|
|
|
|
new_population = prev_population
|
|
|
|
worst_indexes = get_worst_elements(prev_population)
|
|
|
|
best_indexes = get_best_elements(current_population)
|
|
|
|
for worst, best in zip(worst_indexes, best_indexes):
|
|
|
|
if current_population[best].fitness > prev_population[worst].fitness:
|
|
|
|
new_population[worst] = current_population[best]
|
2021-06-17 22:45:59 +02:00
|
|
|
return new_population
|
|
|
|
|
|
|
|
|
2021-06-17 23:03:03 +02:00
|
|
|
def replace_population(prev_population, current_population, mode):
|
2021-06-17 22:45:59 +02:00
|
|
|
if mode == "generational":
|
2021-06-17 23:03:03 +02:00
|
|
|
return generational_replacement(prev_population, current_population)
|
|
|
|
return stationary_replacement(prev_population, current_population)
|
2021-06-17 22:45:59 +02:00
|
|
|
|
|
|
|
|
2021-06-18 18:54:34 +02:00
|
|
|
def evaluate_population(population, data, cores=4):
|
|
|
|
fitness_func = partial(evaluate_individual, data=data)
|
|
|
|
with Pool(cores) as pool:
|
|
|
|
evaluated_population = pool.map(fitness_func, population)
|
|
|
|
return evaluated_population
|
|
|
|
|
|
|
|
|
2021-06-20 04:54:58 +02:00
|
|
|
def select_parents(population, n, mode):
|
|
|
|
select_population = population
|
|
|
|
parents = []
|
2021-06-18 19:33:26 +02:00
|
|
|
if mode == "generational":
|
2021-06-20 04:54:58 +02:00
|
|
|
for _ in range(n):
|
|
|
|
element, index = tournament_selection(population=select_population)
|
|
|
|
parents.append(element)
|
|
|
|
select_population.pop(index)
|
2021-06-18 19:33:26 +02:00
|
|
|
else:
|
2021-06-20 04:54:58 +02:00
|
|
|
for _ in range(2):
|
|
|
|
element, index = tournament_selection(population=select_population)
|
|
|
|
parents.append(element)
|
|
|
|
select_population.pop(index)
|
2021-06-18 19:33:26 +02:00
|
|
|
return parents
|
|
|
|
|
|
|
|
|
2021-06-20 04:54:58 +02:00
|
|
|
def genetic_algorithm(n, m, data, select_mode, crossover_mode, max_iterations=100000):
|
2021-06-17 22:45:59 +02:00
|
|
|
population = [generate_individual(n, m, data) for _ in range(n)]
|
2021-06-18 18:54:34 +02:00
|
|
|
population = evaluate_population(population, data)
|
|
|
|
for _ in range(max_iterations):
|
2021-06-20 04:54:58 +02:00
|
|
|
parents = select_parents(population, n, select_mode)
|
|
|
|
offspring = crossover(crossover_mode, parents, m)
|
|
|
|
offspring = mutate(offspring, data)
|
|
|
|
population = replace_population(population, offspring, select_mode)
|
|
|
|
population = evaluate_population(population, data)
|
|
|
|
best_solution, _ = get_best_elements(population)
|
|
|
|
return best_solution
|
2021-06-18 18:54:34 +02:00
|
|
|
|
|
|
|
|
|
|
|
n, m, data = parse_file("data/GKD-c_11_n500_m50.txt")
|
2021-06-20 04:54:58 +02:00
|
|
|
genetic_algorithm(
|
|
|
|
n=10,
|
|
|
|
m=4,
|
|
|
|
data=data,
|
|
|
|
select_mode="generational",
|
|
|
|
crossover_mode="uniform",
|
|
|
|
max_iterations=1,
|
|
|
|
)
|