MH-P2/src/genetic_algorithm.py

154 lines
5.3 KiB
Python
Raw Normal View History

2021-05-24 18:17:40 +02:00
from numpy import sum, append, arange, delete, where
2021-06-17 19:25:16 +02:00
from numpy.random import randint, choice, shuffle
2021-05-24 18:17:40 +02:00
from pandas import DataFrame
2021-06-17 19:15:50 +02:00
from math import ceil
2021-04-29 12:33:46 +02:00
2021-05-24 18:17:40 +02:00
def get_row_distance(source, destination, data):
row = data.query(
"""(source == @source and destination == @destination) or \
(source == @destination and destination == @source)"""
)
return row["distance"].values[0]
def compute_distance(element, individual, data):
2021-05-24 18:17:40 +02:00
accumulator = 0
distinct_elements = individual.query(f"point != {element}")
2021-05-24 18:17:40 +02:00
for _, item in distinct_elements.iterrows():
accumulator += get_row_distance(
2021-06-17 19:15:50 +02:00
source=element, destination=item.point, data=data
2021-05-24 18:17:40 +02:00
)
return accumulator
def generate_individual(n, m, data):
individual = DataFrame(columns=["point", "distance", "fitness"])
individual["point"] = choice(n, size=m, replace=False)
individual["distance"] = individual["point"].apply(
func=compute_distance, individual=individual, data=data
2021-05-24 18:17:40 +02:00
)
return individual
2021-05-10 19:25:06 +02:00
def evaluate_individual(individual, data):
2021-05-17 20:42:17 +02:00
fitness = []
genotype = individual.point.values
distances = data.query(f"source in @genotype and destination in @genotype")
for item in genotype[:-1]:
2021-05-17 20:42:17 +02:00
element_df = distances.query(f"source == {item} or destination == {item}")
max_distance = element_df["distance"].astype(float).max()
fitness = append(arr=fitness, values=max_distance)
distances = distances.query(f"source != {item} and destination != {item}")
return sum(fitness)
2021-04-29 12:33:46 +02:00
2021-05-25 16:53:59 +02:00
def select_distinct_genes(matching_genes, parents, m):
2021-05-24 18:17:40 +02:00
cutoff = randint(m)
2021-05-25 16:53:59 +02:00
distinct_indexes = delete(arange(m), matching_genes)
first_parent_genes = parents[0].point.iloc[distinct_indexes[cutoff:]]
second_parent_genes = parents[1].point.iloc[distinct_indexes[:cutoff]]
2021-05-24 18:17:40 +02:00
return first_parent_genes, second_parent_genes
2021-05-25 16:53:59 +02:00
def select_random_genes(matching_genes, parents, m):
random_parent = parents[randint(len(parents))]
distinct_indexes = delete(arange(m), matching_genes)
genes = random_parent.point.iloc[distinct_indexes].values
shuffle(genes)
return genes
2021-05-24 18:17:40 +02:00
def repair_offspring(offspring, parents, m):
while len(offspring) != m:
if len(offspring) > m:
best_index = offspring["distance"].astype(float).idxmax()
offspring.drop(index=best_index, inplace=True)
elif len(offspring) < m:
random_parent = parents[randint(len(parents))]
best_index = random_parent["distance"].astype(float).idxmax()
best_point = random_parent["point"].loc[best_index]
offspring = offspring.append(
{"point": best_point, "distance": 0}, ignore_index=True
)
2021-05-25 16:53:59 +02:00
random_parent.drop(index=best_index, inplace=True)
2021-05-24 18:17:40 +02:00
return offspring
def get_matching_genes(parents):
first_parent = parents[0].point
second_parent = parents[1].point
return where(first_parent == second_parent)
2021-05-25 16:53:59 +02:00
def populate_offspring(values):
2021-06-17 22:45:42 +02:00
offspring = DataFrame(columns=["point", "distance", "fitness"])
2021-05-25 16:53:59 +02:00
for element in values:
2021-06-17 22:45:42 +02:00
aux = DataFrame(columns=["point", "distance", "fitness"])
2021-05-25 16:53:59 +02:00
aux["point"] = element
offspring = offspring.append(aux)
2021-05-24 18:17:40 +02:00
offspring["distance"] = 0
2021-06-17 22:45:42 +02:00
offspring["fitness"] = 0
2021-05-25 16:53:59 +02:00
offspring = offspring[1:]
return offspring
def uniform_crossover(parents, m):
matching_indexes = get_matching_genes(parents)
matching_genes = parents[0].point.iloc[matching_indexes]
first_genes, second_genes = select_distinct_genes(matching_genes, parents, m)
offspring = populate_offspring(values=[matching_genes, first_genes, second_genes])
2021-05-24 18:17:40 +02:00
viable_offspring = repair_offspring(offspring, parents, m)
return viable_offspring
2021-05-25 16:53:59 +02:00
def position_crossover(parents, m):
matching_genes = get_matching_genes(parents)
shuffled_genes = select_random_genes(matching_genes, parents, m)
first_offspring = populate_offspring(values=[matching_genes, shuffled_genes])
second_offspring = populate_offspring(values=[matching_genes, shuffled_genes])
return [first_offspring, second_offspring]
2021-05-24 18:17:40 +02:00
def crossover(mode, parents, m):
if mode == "uniform":
return uniform_crossover(parents, m)
return position_crossover(parents, m)
def element_in_dataframe(individual, element):
duplicates = individual.query(f"point == {element}")
return not duplicates.empty
2021-06-17 19:15:50 +02:00
def select_new_gene(individual, n):
while True:
new_gene = randint(n)
if not element_in_dataframe(individual=individual, element=new_gene):
2021-06-17 19:15:50 +02:00
return new_gene
def mutate(population, n, probability=0.001):
expected_mutations = len(population) * n * probability
individuals = []
genes = []
for _ in range(ceil(expected_mutations)):
individuals.append(randint(n))
2021-06-17 19:25:16 +02:00
current_individual = individuals[-1]
genes.append(population[current_individual].sample().index)
2021-06-17 19:15:50 +02:00
for ind, gen in zip(individuals, genes):
individual = population[ind]
individual["point"].iloc[gen] = select_new_gene(individual, n)
individual["distance"].iloc[gen] = 0
return population
2021-05-31 18:12:23 +02:00
def tournament_selection(population):
individuals = population.sample(n=2)
best_index = population["distance"].idxmax()
return individuals.iloc[best_index]
2021-04-29 12:33:46 +02:00
2021-05-10 19:25:06 +02:00
def genetic_algorithm(n, m, data):
2021-05-24 18:17:40 +02:00
first_solution = generate_first_solution(n, m, data)