MH-P2/src/genetic_algorithm.py

from numpy import sum, append, arange, delete, where
from numpy.random import randint, choice, shuffle
from pandas import DataFrame
from math import ceil


def get_row_distance(source, destination, data):
    row = data.query(
        """(source == @source and destination == @destination) or \
        (source == @destination and destination == @source)"""
    )
    return row["distance"].values[0]


def compute_distance(element, individual, data):
    accumulator = 0
    distinct_elements = individual.query(f"point != {element}")
    for _, item in distinct_elements.iterrows():
        accumulator += get_row_distance(
            source=element, destination=item.point, data=data
        )
    return accumulator


def generate_individual(n, m, data):
    individual = DataFrame(columns=["point", "distance", "fitness"])
    individual["point"] = choice(n, size=m, replace=False)
    individual["distance"] = individual["point"].apply(
        func=compute_distance, individual=individual, data=data
    )
    return individual


def evaluate_individual(individual, data):
    fitness = []
    genotype = individual.point.values
    distances = data.query(f"source in @genotype and destination in @genotype")
    for item in genotype[:-1]:
        element_df = distances.query(f"source == {item} or destination == {item}")
        max_distance = element_df["distance"].astype(float).max()
        fitness = append(arr=fitness, values=max_distance)
        distances = distances.query(f"source != {item} and destination != {item}")
    return sum(fitness)


def select_distinct_genes(matching_genes, parents, m):
    cutoff = randint(m)
    distinct_indexes = delete(arange(m), matching_genes)
    first_parent_genes = parents[0].point.iloc[distinct_indexes[cutoff:]]
    second_parent_genes = parents[1].point.iloc[distinct_indexes[:cutoff]]
    return first_parent_genes, second_parent_genes


def select_random_genes(matching_genes, parents, m):
    random_parent = parents[randint(len(parents))]
    distinct_indexes = delete(arange(m), matching_genes)
    genes = random_parent.point.iloc[distinct_indexes].values
    shuffle(genes)
    return genes


def repair_offspring(offspring, parents, m):
    while len(offspring) != m:
        if len(offspring) > m:
            best_index = offspring["distance"].astype(float).idxmax()
            offspring.drop(index=best_index, inplace=True)
        elif len(offspring) < m:
            random_parent = parents[randint(len(parents))]
            best_index = random_parent["distance"].astype(float).idxmax()
            best_point = random_parent["point"].loc[best_index]
            offspring = offspring.append(
                {"point": best_point, "distance": 0}, ignore_index=True
            )
            random_parent.drop(index=best_index, inplace=True)
    return offspring


def get_matching_genes(parents):
    first_parent = parents[0].point
    second_parent = parents[1].point
    return where(first_parent == second_parent)


def populate_offspring(values):
    offspring = DataFrame(columns=["point", "distance", "fitness"])
    for element in values:
        aux = DataFrame(columns=["point", "distance", "fitness"])
        aux["point"] = element
        offspring = offspring.append(aux)
    offspring["distance"] = 0
    offspring["fitness"] = 0
    offspring = offspring[1:]
    return offspring


def uniform_crossover(parents, m):
    matching_indexes = get_matching_genes(parents)
    matching_genes = parents[0].point.iloc[matching_indexes]
    first_genes, second_genes = select_distinct_genes(matching_genes, parents, m)
    offspring = populate_offspring(values=[matching_genes, first_genes, second_genes])
    viable_offspring = repair_offspring(offspring, parents, m)
    return viable_offspring


def position_crossover(parents, m):
    matching_genes = get_matching_genes(parents)
    shuffled_genes = select_random_genes(matching_genes, parents, m)
    first_offspring = populate_offspring(values=[matching_genes, shuffled_genes])
    second_offspring = populate_offspring(values=[matching_genes, shuffled_genes])
    return [first_offspring, second_offspring]


def crossover(mode, parents, m):
    if mode == "uniform":
        return uniform_crossover(parents, m)
    return position_crossover(parents, m)


def element_in_dataframe(individual, element):
    duplicates = individual.query(f"point == {element}")
    return not duplicates.empty


def select_new_gene(individual, n):
    while True:
        new_gene = randint(n)
        if not element_in_dataframe(individual=individual, element=new_gene):
            return new_gene


def mutate(population, n, probability=0.001):
    expected_mutations = len(population) * n * probability
    individuals = []
    genes = []
    for _ in range(ceil(expected_mutations)):
        individuals.append(randint(n))
        current_individual = individuals[-1]
        genes.append(population[current_individual].sample().index)
    for ind, gen in zip(individuals, genes):
        individual = population[ind]
        individual["point"].iloc[gen] = select_new_gene(individual, n)
        individual["distance"].iloc[gen] = 0
    return population


def tournament_selection(population):
    individuals = population.sample(n=2)
    best_index = population["distance"].idxmax()
    return individuals.iloc[best_index]


def genetic_algorithm(n, m, data):
    first_solution = generate_first_solution(n, m, data)
Implement uniform crossover operator 2021-05-24 18:17:40 +02:00			`from numpy import sum, append, arange, delete, where`
Remove deprecated code 2021-06-17 19:25:16 +02:00			`from numpy.random import randint, choice, shuffle`
Implement uniform crossover operator 2021-05-24 18:17:40 +02:00			`from pandas import DataFrame`
Implement mutation operator 2021-06-17 19:15:50 +02:00			`from math import ceil`
Add files from previous lab 2021-04-29 12:33:46 +02:00

Implement uniform crossover operator 2021-05-24 18:17:40 +02:00			`def get_row_distance(source, destination, data):`
			`row = data.query(`
			`"""(source == @source and destination == @destination) or \`
			`(source == @destination and destination == @source)"""`
			`)`
			`return row["distance"].values[0]`


Replace solution instances with individual 2021-06-17 22:44:39 +02:00			`def compute_distance(element, individual, data):`
Implement uniform crossover operator 2021-05-24 18:17:40 +02:00			`accumulator = 0`
Replace solution instances with individual 2021-06-17 22:44:39 +02:00			`distinct_elements = individual.query(f"point != {element}")`
Implement uniform crossover operator 2021-05-24 18:17:40 +02:00			`for _, item in distinct_elements.iterrows():`
			`accumulator += get_row_distance(`
Implement mutation operator 2021-06-17 19:15:50 +02:00			`source=element, destination=item.point, data=data`
Implement uniform crossover operator 2021-05-24 18:17:40 +02:00			`)`
			`return accumulator`


Replace solution instances with individual 2021-06-17 22:44:39 +02:00			`def generate_individual(n, m, data):`
			`individual = DataFrame(columns=["point", "distance", "fitness"])`
			`individual["point"] = choice(n, size=m, replace=False)`
			`individual["distance"] = individual["point"].apply(`
			`func=compute_distance, individual=individual, data=data`
Implement uniform crossover operator 2021-05-24 18:17:40 +02:00			`)`
Replace solution instances with individual 2021-06-17 22:44:39 +02:00			`return individual`
Rename algorithms in main module 2021-05-10 19:25:06 +02:00

Replace solution instances with individual 2021-06-17 22:44:39 +02:00			`def evaluate_individual(individual, data):`
Implement element evaluation 2021-05-17 20:42:17 +02:00			`fitness = []`
Replace solution instances with individual 2021-06-17 22:44:39 +02:00			`genotype = individual.point.values`
Refactor genotype obtention into a function 2021-05-17 20:50:26 +02:00			`distances = data.query(f"source in @genotype and destination in @genotype")`
			`for item in genotype[:-1]:`
Implement element evaluation 2021-05-17 20:42:17 +02:00			`element_df = distances.query(f"source == {item} or destination == {item}")`
			`max_distance = element_df["distance"].astype(float).max()`
			`fitness = append(arr=fitness, values=max_distance)`
			`distances = distances.query(f"source != {item} and destination != {item}")`
			`return sum(fitness)`
Add files from previous lab 2021-04-29 12:33:46 +02:00

Implement position crossover operator 2021-05-25 16:53:59 +02:00			`def select_distinct_genes(matching_genes, parents, m):`
Implement uniform crossover operator 2021-05-24 18:17:40 +02:00			`cutoff = randint(m)`
Implement position crossover operator 2021-05-25 16:53:59 +02:00			`distinct_indexes = delete(arange(m), matching_genes)`
			`first_parent_genes = parents[0].point.iloc[distinct_indexes[cutoff:]]`
			`second_parent_genes = parents[1].point.iloc[distinct_indexes[:cutoff]]`
Implement uniform crossover operator 2021-05-24 18:17:40 +02:00			`return first_parent_genes, second_parent_genes`


Implement position crossover operator 2021-05-25 16:53:59 +02:00			`def select_random_genes(matching_genes, parents, m):`
			`random_parent = parents[randint(len(parents))]`
			`distinct_indexes = delete(arange(m), matching_genes)`
			`genes = random_parent.point.iloc[distinct_indexes].values`
			`shuffle(genes)`
			`return genes`


Implement uniform crossover operator 2021-05-24 18:17:40 +02:00			`def repair_offspring(offspring, parents, m):`
			`while len(offspring) != m:`
			`if len(offspring) > m:`
			`best_index = offspring["distance"].astype(float).idxmax()`
			`offspring.drop(index=best_index, inplace=True)`
			`elif len(offspring) < m:`
			`random_parent = parents[randint(len(parents))]`
			`best_index = random_parent["distance"].astype(float).idxmax()`
			`best_point = random_parent["point"].loc[best_index]`
			`offspring = offspring.append(`
			`{"point": best_point, "distance": 0}, ignore_index=True`
			`)`
Implement position crossover operator 2021-05-25 16:53:59 +02:00			`random_parent.drop(index=best_index, inplace=True)`
Implement uniform crossover operator 2021-05-24 18:17:40 +02:00			`return offspring`


			`def get_matching_genes(parents):`
			`first_parent = parents[0].point`
			`second_parent = parents[1].point`
			`return where(first_parent == second_parent)`


Implement position crossover operator 2021-05-25 16:53:59 +02:00			`def populate_offspring(values):`
Add fitness column to individual 2021-06-17 22:45:42 +02:00			`offspring = DataFrame(columns=["point", "distance", "fitness"])`
Implement position crossover operator 2021-05-25 16:53:59 +02:00			`for element in values:`
Add fitness column to individual 2021-06-17 22:45:42 +02:00			`aux = DataFrame(columns=["point", "distance", "fitness"])`
Implement position crossover operator 2021-05-25 16:53:59 +02:00			`aux["point"] = element`
			`offspring = offspring.append(aux)`
Implement uniform crossover operator 2021-05-24 18:17:40 +02:00			`offspring["distance"] = 0`
Add fitness column to individual 2021-06-17 22:45:42 +02:00			`offspring["fitness"] = 0`
Implement position crossover operator 2021-05-25 16:53:59 +02:00			`offspring = offspring[1:]`
			`return offspring`


			`def uniform_crossover(parents, m):`
			`matching_indexes = get_matching_genes(parents)`
			`matching_genes = parents[0].point.iloc[matching_indexes]`
			`first_genes, second_genes = select_distinct_genes(matching_genes, parents, m)`
			`offspring = populate_offspring(values=[matching_genes, first_genes, second_genes])`
Implement uniform crossover operator 2021-05-24 18:17:40 +02:00			`viable_offspring = repair_offspring(offspring, parents, m)`
			`return viable_offspring`


Implement position crossover operator 2021-05-25 16:53:59 +02:00			`def position_crossover(parents, m):`
			`matching_genes = get_matching_genes(parents)`
			`shuffled_genes = select_random_genes(matching_genes, parents, m)`
Return 2 offsprings in the position crossover 2021-06-17 22:45:14 +02:00			`first_offspring = populate_offspring(values=[matching_genes, shuffled_genes])`
			`second_offspring = populate_offspring(values=[matching_genes, shuffled_genes])`
			`return [first_offspring, second_offspring]`
Implement uniform crossover operator 2021-05-24 18:17:40 +02:00

			`def crossover(mode, parents, m):`
			`if mode == "uniform":`
			`return uniform_crossover(parents, m)`
			`return position_crossover(parents, m)`


Replace solution instances with individual 2021-06-17 22:44:39 +02:00			`def element_in_dataframe(individual, element):`
			`duplicates = individual.query(f"point == {element}")`
Implement binary tournament selection operator 2021-05-31 18:24:20 +02:00			`return not duplicates.empty`


Implement mutation operator 2021-06-17 19:15:50 +02:00			`def select_new_gene(individual, n):`
			`while True:`
			`new_gene = randint(n)`
Replace solution instances with individual 2021-06-17 22:44:39 +02:00			`if not element_in_dataframe(individual=individual, element=new_gene):`
Implement mutation operator 2021-06-17 19:15:50 +02:00			`return new_gene`


			`def mutate(population, n, probability=0.001):`
			`expected_mutations = len(population) * n * probability`
			`individuals = []`
			`genes = []`
			`for _ in range(ceil(expected_mutations)):`
			`individuals.append(randint(n))`
Remove deprecated code 2021-06-17 19:25:16 +02:00			`current_individual = individuals[-1]`
			`genes.append(population[current_individual].sample().index)`
Implement mutation operator 2021-06-17 19:15:50 +02:00			`for ind, gen in zip(individuals, genes):`
			`individual = population[ind]`
			`individual["point"].iloc[gen] = select_new_gene(individual, n)`
			`individual["distance"].iloc[gen] = 0`
			`return population`
Implement mutation operator 2021-05-31 18:12:23 +02:00

Replace solution instances with individual 2021-06-17 22:44:39 +02:00			`def tournament_selection(population):`
			`individuals = population.sample(n=2)`
			`best_index = population["distance"].idxmax()`
Implement binary tournament selection operator 2021-05-31 18:24:20 +02:00			`return individuals.iloc[best_index]`
Add files from previous lab 2021-04-29 12:33:46 +02:00

Rename algorithms in main module 2021-05-10 19:25:06 +02:00			`def genetic_algorithm(n, m, data):`
Implement uniform crossover operator 2021-05-24 18:17:40 +02:00			`first_solution = generate_first_solution(n, m, data)`