MH-P2/src/genetic_algorithm.py

298 lines
11 KiB
Python
Raw Normal View History

2021-06-21 00:47:48 +02:00
from numpy import sum, append, intersect1d
2021-06-17 19:25:16 +02:00
from numpy.random import randint, choice, shuffle
2021-05-24 18:17:40 +02:00
from pandas import DataFrame
2021-06-17 19:15:50 +02:00
from math import ceil
from functools import partial
from multiprocessing import Pool
from copy import deepcopy
from preprocessing import parse_file
2021-04-29 12:33:46 +02:00
2021-05-24 18:17:40 +02:00
def get_row_distance(source, destination, data):
row = data.query(
"""(source == @source and destination == @destination) or \
(source == @destination and destination == @source)"""
)
return row["distance"].values[0]
def compute_distance(element, individual, data):
2021-05-24 18:17:40 +02:00
accumulator = 0
distinct_elements = individual.query(f"point != {element}")
2021-05-24 18:17:40 +02:00
for _, item in distinct_elements.iterrows():
accumulator += get_row_distance(
2021-06-17 19:15:50 +02:00
source=element, destination=item.point, data=data
2021-05-24 18:17:40 +02:00
)
return accumulator
def generate_individual(n, m, data):
individual = DataFrame(columns=["point", "distance", "fitness"])
individual["point"] = choice(n, size=m, replace=False)
individual["distance"] = individual["point"].apply(
func=compute_distance, individual=individual, data=data
2021-05-24 18:17:40 +02:00
)
return individual
2021-05-10 19:25:06 +02:00
def evaluate_individual(individual, data):
2021-05-17 20:42:17 +02:00
fitness = []
genotype = individual.point.values
distances = data.query(f"source in @genotype and destination in @genotype")
for item in genotype[:-1]:
2021-05-17 20:42:17 +02:00
element_df = distances.query(f"source == {item} or destination == {item}")
max_distance = element_df["distance"].astype(float).max()
fitness = append(arr=fitness, values=max_distance)
distances = distances.query(f"source != {item} and destination != {item}")
individual["fitness"] = sum(fitness)
return individual
2021-04-29 12:33:46 +02:00
2021-05-25 16:53:59 +02:00
def select_distinct_genes(matching_genes, parents, m):
2021-06-19 19:13:14 +02:00
first_parent = parents[0].query("point not in @matching_genes")
second_parent = parents[1].query("point not in @matching_genes")
2021-06-20 04:54:58 +02:00
cutoff = randint(m - len(matching_genes))
2021-06-19 19:13:14 +02:00
first_parent_genes = first_parent.point.values[cutoff:]
second_parent_genes = second_parent.point.values[:cutoff]
2021-05-24 18:17:40 +02:00
return first_parent_genes, second_parent_genes
2021-06-21 00:47:48 +02:00
def select_shuffled_genes(matching_genes, parents):
2021-06-20 19:48:04 +02:00
first_parent = parents[0].query("point not in @matching_genes")
second_parent = parents[1].query("point not in @matching_genes")
first_genes = first_parent.point.values
second_genes = second_parent.point.values
shuffle(first_genes)
shuffle(second_genes)
return first_genes, second_genes
2021-05-25 16:53:59 +02:00
2021-06-20 04:54:58 +02:00
def select_random_parent(parents):
random_index = randint(len(parents))
random_parent = parents[random_index]
if random_parent.point.empty:
opposite_index = 1 - random_index
random_parent = parents[opposite_index]
return random_parent
2021-06-21 00:47:48 +02:00
def get_best_point(parents, offspring):
while True:
random_parent = deepcopy(select_random_parent(parents))
best_index = random_parent["distance"].idxmax()
best_point = random_parent["point"].iloc[best_index]
random_parent.drop(index=best_index, inplace=True)
if best_point not in offspring.point.values:
return best_point
2021-05-24 18:17:40 +02:00
def repair_offspring(offspring, parents, m):
while len(offspring) != m:
if len(offspring) > m:
2021-06-19 19:13:14 +02:00
best_index = offspring["distance"].idxmax()
2021-05-24 18:17:40 +02:00
offspring.drop(index=best_index, inplace=True)
elif len(offspring) < m:
2021-06-21 00:47:48 +02:00
best_point = get_best_point(parents, offspring)
2021-05-24 18:17:40 +02:00
offspring = offspring.append(
2021-06-19 19:13:14 +02:00
{"point": best_point, "distance": 0, "fitness": 0}, ignore_index=True
2021-05-24 18:17:40 +02:00
)
return offspring
def get_matching_genes(parents):
2021-06-19 19:13:14 +02:00
first_parent = parents[0].point.values
second_parent = parents[1].point.values
2021-06-19 20:22:39 +02:00
return intersect1d(first_parent, second_parent)
2021-05-24 18:17:40 +02:00
2021-05-25 16:53:59 +02:00
def populate_offspring(values):
2021-06-17 22:45:42 +02:00
offspring = DataFrame(columns=["point", "distance", "fitness"])
2021-05-25 16:53:59 +02:00
for element in values:
2021-06-17 22:45:42 +02:00
aux = DataFrame(columns=["point", "distance", "fitness"])
2021-05-25 16:53:59 +02:00
aux["point"] = element
offspring = offspring.append(aux)
2021-05-24 18:17:40 +02:00
offspring["distance"] = 0
2021-06-17 22:45:42 +02:00
offspring["fitness"] = 0
2021-05-25 16:53:59 +02:00
return offspring
def uniform_crossover(parents, m):
2021-06-19 19:13:14 +02:00
matching_genes = get_matching_genes(parents)
2021-05-25 16:53:59 +02:00
first_genes, second_genes = select_distinct_genes(matching_genes, parents, m)
offspring = populate_offspring(values=[matching_genes, first_genes, second_genes])
2021-05-24 18:17:40 +02:00
viable_offspring = repair_offspring(offspring, parents, m)
return viable_offspring
2021-06-20 19:48:04 +02:00
def position_crossover(parents):
2021-05-25 16:53:59 +02:00
matching_genes = get_matching_genes(parents)
2021-06-21 00:47:48 +02:00
first_genes, second_genes = select_shuffled_genes(matching_genes, parents)
2021-06-20 19:48:04 +02:00
first_offspring = populate_offspring(values=[matching_genes, first_genes])
second_offspring = populate_offspring(values=[matching_genes, second_genes])
return first_offspring, second_offspring
2021-05-24 18:17:40 +02:00
def crossover(mode, parents, m):
2021-06-20 18:04:23 +02:00
split_parents = list(zip(*[iter(parents)] * 2))
offspring = []
2021-05-24 18:17:40 +02:00
if mode == "uniform":
2021-06-20 19:48:04 +02:00
for element in split_parents:
offspring.append(uniform_crossover(element, m))
offspring.append(uniform_crossover(element, m))
2021-06-19 19:13:14 +02:00
else:
2021-06-20 19:48:04 +02:00
for element in split_parents:
first_offspring, second_offspring = position_crossover(element)
offspring.append(first_offspring)
offspring.append(second_offspring)
2021-06-19 19:13:14 +02:00
return offspring
2021-05-24 18:17:40 +02:00
def element_in_dataframe(individual, element):
duplicates = individual.query(f"point == {element}")
return not duplicates.empty
2021-06-17 19:15:50 +02:00
def select_new_gene(individual, n):
while True:
new_gene = randint(n)
if not element_in_dataframe(individual=individual, element=new_gene):
2021-06-17 19:15:50 +02:00
return new_gene
2021-06-20 04:54:58 +02:00
def mutate(offspring, data, probability=0.001):
expected_mutations = len(offspring) * n * probability
2021-06-17 19:15:50 +02:00
individuals = []
genes = []
for _ in range(ceil(expected_mutations)):
2021-06-20 04:54:58 +02:00
individuals.append(randint(len(offspring)))
2021-06-17 19:25:16 +02:00
current_individual = individuals[-1]
2021-06-20 04:54:58 +02:00
genes.append(offspring[current_individual].sample().index)
2021-06-17 19:15:50 +02:00
for ind, gen in zip(individuals, genes):
2021-06-20 04:54:58 +02:00
individual = offspring[ind]
2021-06-17 19:15:50 +02:00
individual["point"].iloc[gen] = select_new_gene(individual, n)
2021-06-20 04:54:58 +02:00
individual["distance"].iloc[gen] = compute_distance(
element=individual["point"].iloc[gen].values[0],
individual=individual,
data=data,
)
return offspring
def get_individual_index(element, population):
2021-06-20 04:54:58 +02:00
for index in range(len(population)):
if population[index].fitness.values[0] == element.fitness.values[0]:
return index
2021-05-31 18:12:23 +02:00
2021-06-20 04:54:58 +02:00
def tournament_selection(population):
individuals = [population[randint(len(population))] for _ in range(2)]
best_element = max(individuals, key=lambda x: x.fitness.values[0])
population_index = get_individual_index(best_element, population)
2021-06-20 04:54:58 +02:00
return best_element, population_index
2021-04-29 12:33:46 +02:00
def check_element_population(element, population):
for item in population:
if all(element.point.values) == all(item.point.values):
return True
return False
def generational_replacement(prev_population, current_population):
new_population = current_population
best_previous_individual = max(prev_population, key=lambda x: x.fitness.values[0])
if check_element_population(best_previous_individual, new_population):
worst_element = min(new_population, key=lambda x: x.fitness.values[0])
worst_index = get_individual_index(worst_element, new_population)
new_population[worst_index] = best_previous_individual
return new_population
def get_best_elements(population):
select_population = deepcopy(population)
first_element = max(select_population, key=lambda x: x.fitness.values[0])
first_index = get_individual_index(first_element, select_population)
select_population.pop(first_index)
second_element = max(select_population, key=lambda x: x.fitness.values[0])
second_index = get_individual_index(second_element, select_population)
return first_index, second_index
def get_worst_elements(population):
2021-06-20 18:04:23 +02:00
select_population = deepcopy(population)
first_element = min(select_population, key=lambda x: x.fitness.values[0])
first_index = get_individual_index(first_element, select_population)
select_population.pop(first_index)
second_element = min(select_population, key=lambda x: x.fitness.values[0])
second_index = get_individual_index(second_element, select_population)
return first_index, second_index
def stationary_replacement(prev_population, current_population):
new_population = prev_population
2021-06-20 18:04:23 +02:00
first_worst, second_worst = get_worst_elements(prev_population)
first_best, second_best = get_best_elements(current_population)
worst_indexes = [first_worst, second_worst]
best_indexes = [first_best, second_best]
for worst, best in zip(worst_indexes, best_indexes):
2021-06-20 18:04:23 +02:00
if (
current_population[best].fitness.values[0]
> prev_population[worst].fitness.values[0]
):
new_population[worst] = current_population[best]
return new_population
def replace_population(prev_population, current_population, mode):
if mode == "generational":
return generational_replacement(prev_population, current_population)
return stationary_replacement(prev_population, current_population)
def evaluate_population(population, data, cores=4):
fitness_func = partial(evaluate_individual, data=data)
with Pool(cores) as pool:
evaluated_population = pool.map(fitness_func, population)
return evaluated_population
2021-06-20 04:54:58 +02:00
def select_parents(population, n, mode):
select_population = deepcopy(population)
2021-06-20 04:54:58 +02:00
parents = []
2021-06-18 19:33:26 +02:00
if mode == "generational":
2021-06-20 04:54:58 +02:00
for _ in range(n):
element, index = tournament_selection(population=select_population)
parents.append(element)
select_population.pop(index)
2021-06-18 19:33:26 +02:00
else:
2021-06-20 04:54:58 +02:00
for _ in range(2):
element, index = tournament_selection(population=select_population)
parents.append(element)
select_population.pop(index)
2021-06-18 19:33:26 +02:00
return parents
2021-06-20 04:54:58 +02:00
def genetic_algorithm(n, m, data, select_mode, crossover_mode, max_iterations=100000):
population = [generate_individual(n, m, data) for _ in range(n)]
population = evaluate_population(population, data)
for _ in range(max_iterations):
2021-06-20 04:54:58 +02:00
parents = select_parents(population, n, select_mode)
offspring = crossover(crossover_mode, parents, m)
offspring = mutate(offspring, data)
population = replace_population(population, offspring, select_mode)
population = evaluate_population(population, data)
best_index, _ = get_best_elements(population)
return population[best_index]
n, m, data = parse_file("data/GKD-c_11_n500_m50.txt")
2021-06-20 04:54:58 +02:00
genetic_algorithm(
n=10,
m=4,
data=data,
select_mode="generational",
crossover_mode="uniform",
2021-06-20 18:04:23 +02:00
max_iterations=10,
2021-06-20 04:54:58 +02:00
)