MH-P2/src/genetic_algorithm.py

from numpy import sum, append, arange, delete, intersect1d
from numpy.random import randint, choice, shuffle
from pandas import DataFrame
from math import ceil
from functools import partial
from multiprocessing import Pool

from preprocessing import parse_file


def get_row_distance(source, destination, data):
    row = data.query(
        """(source == @source and destination == @destination) or \
        (source == @destination and destination == @source)"""
    )
    return row["distance"].values[0]


def compute_distance(element, individual, data):
    accumulator = 0
    distinct_elements = individual.query(f"point != {element}")
    for _, item in distinct_elements.iterrows():
        accumulator += get_row_distance(
            source=element, destination=item.point, data=data
        )
    return accumulator


def generate_individual(n, m, data):
    individual = DataFrame(columns=["point", "distance", "fitness"])
    individual["point"] = choice(n, size=m, replace=False)
    individual["distance"] = individual["point"].apply(
        func=compute_distance, individual=individual, data=data
    )
    return individual


def evaluate_individual(individual, data):
    fitness = []
    genotype = individual.point.values
    distances = data.query(f"source in @genotype and destination in @genotype")
    for item in genotype[:-1]:
        element_df = distances.query(f"source == {item} or destination == {item}")
        max_distance = element_df["distance"].astype(float).max()
        fitness = append(arr=fitness, values=max_distance)
        distances = distances.query(f"source != {item} and destination != {item}")
    individual["fitness"] = sum(fitness)
    return individual


def select_distinct_genes(matching_genes, parents, m):
    first_parent = parents[0].query("point not in @matching_genes")
    second_parent = parents[1].query("point not in @matching_genes")
    cutoff = randint(len(first_parent.point.values))
    first_parent_genes = first_parent.point.values[cutoff:]
    second_parent_genes = second_parent.point.values[:cutoff]
    return first_parent_genes, second_parent_genes


def select_random_genes(matching_genes, parents, m):
    random_parent = parents[randint(len(parents))]
    distinct_indexes = delete(arange(m), matching_genes)
    genes = random_parent.point.iloc[distinct_indexes].values
    shuffle(genes)
    return genes


def repair_offspring(offspring, parents, m):
    while len(offspring) != m:
        if len(offspring) > m:
            best_index = offspring["distance"].idxmax()
            offspring.drop(index=best_index, inplace=True)
        elif len(offspring) < m:
            random_parent = parents[randint(len(parents))]
            while True:
                best_index = random_parent["distance"].idxmax()
                best_point = random_parent["point"].loc[best_index]
                random_parent.drop(index=best_index, inplace=True)
                if not any(offspring["point"].isin([best_point])):
                    break
            offspring = offspring.append(
                {"point": best_point, "distance": 0, "fitness": 0}, ignore_index=True
            )
    return offspring


def get_matching_genes(parents):
    first_parent = parents[0].point.values
    second_parent = parents[1].point.values
    return intersect1d(first_parent, second_parent)


def populate_offspring(values):
    offspring = DataFrame(columns=["point", "distance", "fitness"])
    for element in values:
        aux = DataFrame(columns=["point", "distance", "fitness"])
        aux["point"] = element
        offspring = offspring.append(aux)
    offspring["distance"] = 0
    offspring["fitness"] = 0
    offspring = offspring[1:]
    return offspring


def uniform_crossover(parents, m):
    matching_genes = get_matching_genes(parents)
    first_genes, second_genes = select_distinct_genes(matching_genes, parents, m)
    offspring = populate_offspring(values=[matching_genes, first_genes, second_genes])
    viable_offspring = repair_offspring(offspring, parents, m)
    return viable_offspring


def position_crossover(parents, m):
    matching_genes = get_matching_genes(parents)
    shuffled_genes = select_random_genes(matching_genes, parents, m)
    first_offspring = populate_offspring(values=[matching_genes, shuffled_genes])
    second_offspring = populate_offspring(values=[matching_genes, shuffled_genes])
    return [first_offspring, second_offspring]


def crossover(mode, parents, m):
    split_parents = [parents[i : i + 2] for i in range(0, len(parents), 2)]
    if mode == "uniform":
        crossover_func = partial(uniform_crossover, m=m)
    else:
        crossover_func = partial(position_crossover, m=m)
    offspring = [*map(crossover_func, split_parents)]
    return offspring


def element_in_dataframe(individual, element):
    duplicates = individual.query(f"point == {element}")
    return not duplicates.empty


def select_new_gene(individual, n):
    while True:
        new_gene = randint(n)
        if not element_in_dataframe(individual=individual, element=new_gene):
            return new_gene


def mutate(population, n, probability=0.001):
    expected_mutations = len(population) * n * probability
    individuals = []
    genes = []
    for _ in range(ceil(expected_mutations)):
        individuals.append(randint(n))
        current_individual = individuals[-1]
        genes.append(population[current_individual].sample().index)
    for ind, gen in zip(individuals, genes):
        individual = population[ind]
        individual["point"].iloc[gen] = select_new_gene(individual, n)
        individual["distance"].iloc[gen] = 0
    return population


def tournament_selection(m, population):
    individuals = [population[randint(m)] for _ in range(2)]
    best_index = population.index(max(population, key=lambda x: all(x.fitness)))
    return individuals[best_index]


def generational_replacement(previous_population, current_population):
    new_population = current_population
    best_previous_individual = max(previous_population, key=lambda x: all(x.fitness))
    if best_previous_individual not in new_population:
        worst_index = new_population.index(
            min(new_population, key=lambda x: all(x.fitness))
        )
        new_population[worst_index] = best_previous_individual
    return new_population


def get_best_elements(population):
    first_index = population.index(max(population, key=lambda x: all(x.fitness)))
    population.pop(first_index)
    second_index = population.index(max(population, key=lambda x: all(x.fitness)))
    return first_index, second_index


def get_worst_elements(population):
    first_index = population.index(min(population, key=lambda x: all(x.fitness)))
    population.pop(first_index)
    second_index = population.index(min(population, key=lambda x: all(x.fitness)))
    return first_index, second_index


def stationary_replacement(prev_population, current_population):
    new_population = prev_population
    worst_indexes = get_worst_elements(prev_population)
    best_indexes = get_best_elements(current_population)
    for worst, best in zip(worst_indexes, best_indexes):
        if current_population[best].fitness > prev_population[worst].fitness:
            new_population[worst] = current_population[best]
    return new_population


def replace_population(prev_population, current_population, mode):
    if mode == "generational":
        return generational_replacement(prev_population, current_population)
    return stationary_replacement(prev_population, current_population)


def evaluate_population(population, data, cores=4):
    fitness_func = partial(evaluate_individual, data=data)
    with Pool(cores) as pool:
        evaluated_population = pool.map(fitness_func, population)
    return evaluated_population


def select_new_population(population, n, m, mode):
    if mode == "generational":
        parents = [tournament_selection(m, population) for _ in range(n)]
    else:
        parents = [tournament_selection(m, population) for _ in range(2)]
    return parents


def genetic_algorithm(n, m, data, mode, max_iterations=100000):
    population = [generate_individual(n, m, data) for _ in range(n)]
    population = evaluate_population(population, data)
    for _ in range(max_iterations):
        parents = select_new_population(population, n, m, mode)


n, m, data = parse_file("data/GKD-c_11_n500_m50.txt")
genetic_algorithm(n=10, m=5, data=data, mode="generational", max_iterations=1)
Fix matching genes selection 2021-06-19 20:22:39 +02:00			`from numpy import sum, append, arange, delete, intersect1d`
Remove deprecated code 2021-06-17 19:25:16 +02:00			`from numpy.random import randint, choice, shuffle`
Implement uniform crossover operator 2021-05-24 18:17:40 +02:00			`from pandas import DataFrame`
Implement mutation operator 2021-06-17 19:15:50 +02:00			`from math import ceil`
Add population evaluation with multiprocessing 2021-06-18 18:54:34 +02:00			`from functools import partial`
			`from multiprocessing import Pool`

			`from preprocessing import parse_file`
Add files from previous lab 2021-04-29 12:33:46 +02:00

Implement uniform crossover operator 2021-05-24 18:17:40 +02:00			`def get_row_distance(source, destination, data):`
			`row = data.query(`
			`"""(source == @source and destination == @destination) or \`
			`(source == @destination and destination == @source)"""`
			`)`
			`return row["distance"].values[0]`


Replace solution instances with individual 2021-06-17 22:44:39 +02:00			`def compute_distance(element, individual, data):`
Implement uniform crossover operator 2021-05-24 18:17:40 +02:00			`accumulator = 0`
Replace solution instances with individual 2021-06-17 22:44:39 +02:00			`distinct_elements = individual.query(f"point != {element}")`
Implement uniform crossover operator 2021-05-24 18:17:40 +02:00			`for _, item in distinct_elements.iterrows():`
			`accumulator += get_row_distance(`
Implement mutation operator 2021-06-17 19:15:50 +02:00			`source=element, destination=item.point, data=data`
Implement uniform crossover operator 2021-05-24 18:17:40 +02:00			`)`
			`return accumulator`


Replace solution instances with individual 2021-06-17 22:44:39 +02:00			`def generate_individual(n, m, data):`
			`individual = DataFrame(columns=["point", "distance", "fitness"])`
			`individual["point"] = choice(n, size=m, replace=False)`
			`individual["distance"] = individual["point"].apply(`
			`func=compute_distance, individual=individual, data=data`
Implement uniform crossover operator 2021-05-24 18:17:40 +02:00			`)`
Replace solution instances with individual 2021-06-17 22:44:39 +02:00			`return individual`
Rename algorithms in main module 2021-05-10 19:25:06 +02:00

Replace solution instances with individual 2021-06-17 22:44:39 +02:00			`def evaluate_individual(individual, data):`
Implement element evaluation 2021-05-17 20:42:17 +02:00			`fitness = []`
Replace solution instances with individual 2021-06-17 22:44:39 +02:00			`genotype = individual.point.values`
Refactor genotype obtention into a function 2021-05-17 20:50:26 +02:00			`distances = data.query(f"source in @genotype and destination in @genotype")`
			`for item in genotype[:-1]:`
Implement element evaluation 2021-05-17 20:42:17 +02:00			`element_df = distances.query(f"source == {item} or destination == {item}")`
			`max_distance = element_df["distance"].astype(float).max()`
			`fitness = append(arr=fitness, values=max_distance)`
			`distances = distances.query(f"source != {item} and destination != {item}")`
Add population evaluation with multiprocessing 2021-06-18 18:54:34 +02:00			`individual["fitness"] = sum(fitness)`
			`return individual`
Add files from previous lab 2021-04-29 12:33:46 +02:00

Implement position crossover operator 2021-05-25 16:53:59 +02:00			`def select_distinct_genes(matching_genes, parents, m):`
Fix uniform crossover operator 2021-06-19 19:13:14 +02:00			`first_parent = parents[0].query("point not in @matching_genes")`
			`second_parent = parents[1].query("point not in @matching_genes")`
			`cutoff = randint(len(first_parent.point.values))`
			`first_parent_genes = first_parent.point.values[cutoff:]`
			`second_parent_genes = second_parent.point.values[:cutoff]`
Implement uniform crossover operator 2021-05-24 18:17:40 +02:00			`return first_parent_genes, second_parent_genes`


Implement position crossover operator 2021-05-25 16:53:59 +02:00			`def select_random_genes(matching_genes, parents, m):`
			`random_parent = parents[randint(len(parents))]`
			`distinct_indexes = delete(arange(m), matching_genes)`
			`genes = random_parent.point.iloc[distinct_indexes].values`
			`shuffle(genes)`
			`return genes`


Implement uniform crossover operator 2021-05-24 18:17:40 +02:00			`def repair_offspring(offspring, parents, m):`
			`while len(offspring) != m:`
			`if len(offspring) > m:`
Fix uniform crossover operator 2021-06-19 19:13:14 +02:00			`best_index = offspring["distance"].idxmax()`
Implement uniform crossover operator 2021-05-24 18:17:40 +02:00			`offspring.drop(index=best_index, inplace=True)`
			`elif len(offspring) < m:`
			`random_parent = parents[randint(len(parents))]`
Fix uniform crossover operator 2021-06-19 19:13:14 +02:00			`while True:`
			`best_index = random_parent["distance"].idxmax()`
			`best_point = random_parent["point"].loc[best_index]`
			`random_parent.drop(index=best_index, inplace=True)`
			`if not any(offspring["point"].isin([best_point])):`
			`break`
Implement uniform crossover operator 2021-05-24 18:17:40 +02:00			`offspring = offspring.append(`
Fix uniform crossover operator 2021-06-19 19:13:14 +02:00			`{"point": best_point, "distance": 0, "fitness": 0}, ignore_index=True`
Implement uniform crossover operator 2021-05-24 18:17:40 +02:00			`)`
			`return offspring`


			`def get_matching_genes(parents):`
Fix uniform crossover operator 2021-06-19 19:13:14 +02:00			`first_parent = parents[0].point.values`
			`second_parent = parents[1].point.values`
Fix matching genes selection 2021-06-19 20:22:39 +02:00			`return intersect1d(first_parent, second_parent)`
Implement uniform crossover operator 2021-05-24 18:17:40 +02:00

Implement position crossover operator 2021-05-25 16:53:59 +02:00			`def populate_offspring(values):`
Add fitness column to individual 2021-06-17 22:45:42 +02:00			`offspring = DataFrame(columns=["point", "distance", "fitness"])`
Implement position crossover operator 2021-05-25 16:53:59 +02:00			`for element in values:`
Add fitness column to individual 2021-06-17 22:45:42 +02:00			`aux = DataFrame(columns=["point", "distance", "fitness"])`
Implement position crossover operator 2021-05-25 16:53:59 +02:00			`aux["point"] = element`
			`offspring = offspring.append(aux)`
Implement uniform crossover operator 2021-05-24 18:17:40 +02:00			`offspring["distance"] = 0`
Add fitness column to individual 2021-06-17 22:45:42 +02:00			`offspring["fitness"] = 0`
Implement position crossover operator 2021-05-25 16:53:59 +02:00			`offspring = offspring[1:]`
			`return offspring`


			`def uniform_crossover(parents, m):`
Fix uniform crossover operator 2021-06-19 19:13:14 +02:00			`matching_genes = get_matching_genes(parents)`
Implement position crossover operator 2021-05-25 16:53:59 +02:00			`first_genes, second_genes = select_distinct_genes(matching_genes, parents, m)`
			`offspring = populate_offspring(values=[matching_genes, first_genes, second_genes])`
Implement uniform crossover operator 2021-05-24 18:17:40 +02:00			`viable_offspring = repair_offspring(offspring, parents, m)`
			`return viable_offspring`


Implement position crossover operator 2021-05-25 16:53:59 +02:00			`def position_crossover(parents, m):`
			`matching_genes = get_matching_genes(parents)`
			`shuffled_genes = select_random_genes(matching_genes, parents, m)`
Return 2 offsprings in the position crossover 2021-06-17 22:45:14 +02:00			`first_offspring = populate_offspring(values=[matching_genes, shuffled_genes])`
			`second_offspring = populate_offspring(values=[matching_genes, shuffled_genes])`
			`return [first_offspring, second_offspring]`
Implement uniform crossover operator 2021-05-24 18:17:40 +02:00

			`def crossover(mode, parents, m):`
Fix uniform crossover operator 2021-06-19 19:13:14 +02:00			`split_parents = [parents[i : i + 2] for i in range(0, len(parents), 2)]`
Implement uniform crossover operator 2021-05-24 18:17:40 +02:00			`if mode == "uniform":`
Fix uniform crossover operator 2021-06-19 19:13:14 +02:00			`crossover_func = partial(uniform_crossover, m=m)`
			`else:`
			`crossover_func = partial(position_crossover, m=m)`
			`offspring = [*map(crossover_func, split_parents)]`
			`return offspring`
Implement uniform crossover operator 2021-05-24 18:17:40 +02:00

Replace solution instances with individual 2021-06-17 22:44:39 +02:00			`def element_in_dataframe(individual, element):`
			`duplicates = individual.query(f"point == {element}")`
Implement binary tournament selection operator 2021-05-31 18:24:20 +02:00			`return not duplicates.empty`


Implement mutation operator 2021-06-17 19:15:50 +02:00			`def select_new_gene(individual, n):`
			`while True:`
			`new_gene = randint(n)`
Replace solution instances with individual 2021-06-17 22:44:39 +02:00			`if not element_in_dataframe(individual=individual, element=new_gene):`
Implement mutation operator 2021-06-17 19:15:50 +02:00			`return new_gene`


			`def mutate(population, n, probability=0.001):`
			`expected_mutations = len(population) * n * probability`
			`individuals = []`
			`genes = []`
			`for _ in range(ceil(expected_mutations)):`
			`individuals.append(randint(n))`
Remove deprecated code 2021-06-17 19:25:16 +02:00			`current_individual = individuals[-1]`
			`genes.append(population[current_individual].sample().index)`
Implement mutation operator 2021-06-17 19:15:50 +02:00			`for ind, gen in zip(individuals, genes):`
			`individual = population[ind]`
			`individual["point"].iloc[gen] = select_new_gene(individual, n)`
			`individual["distance"].iloc[gen] = 0`
			`return population`
Implement mutation operator 2021-05-31 18:12:23 +02:00

Implement population selection 2021-06-18 19:33:26 +02:00			`def tournament_selection(m, population):`
			`individuals = [population[randint(m)] for _ in range(2)]`
			`best_index = population.index(max(population, key=lambda x: all(x.fitness)))`
			`return individuals[best_index]`
Add files from previous lab 2021-04-29 12:33:46 +02:00

Implement the generational replacement operator 2021-06-17 22:45:59 +02:00			`def generational_replacement(previous_population, current_population):`
			`new_population = current_population`
Fix max and min selection according to fitness 2021-06-18 20:06:59 +02:00			`best_previous_individual = max(previous_population, key=lambda x: all(x.fitness))`
Implement the generational replacement operator 2021-06-17 22:45:59 +02:00			`if best_previous_individual not in new_population:`
Fix max and min selection according to fitness 2021-06-18 20:06:59 +02:00			`worst_index = new_population.index(`
			`min(new_population, key=lambda x: all(x.fitness))`
			`)`
Implement the generational replacement operator 2021-06-17 22:45:59 +02:00			`new_population[worst_index] = best_previous_individual`
			`return new_population`


Implement the stationary replacement operator 2021-06-17 23:03:03 +02:00			`def get_best_elements(population):`
Fix max and min selection according to fitness 2021-06-18 20:06:59 +02:00			`first_index = population.index(max(population, key=lambda x: all(x.fitness)))`
Implement the stationary replacement operator 2021-06-17 23:03:03 +02:00			`population.pop(first_index)`
Fix max and min selection according to fitness 2021-06-18 20:06:59 +02:00			`second_index = population.index(max(population, key=lambda x: all(x.fitness)))`
Implement the stationary replacement operator 2021-06-17 23:03:03 +02:00			`return first_index, second_index`


			`def get_worst_elements(population):`
Fix max and min selection according to fitness 2021-06-18 20:06:59 +02:00			`first_index = population.index(min(population, key=lambda x: all(x.fitness)))`
Implement the stationary replacement operator 2021-06-17 23:03:03 +02:00			`population.pop(first_index)`
Fix max and min selection according to fitness 2021-06-18 20:06:59 +02:00			`second_index = population.index(min(population, key=lambda x: all(x.fitness)))`
Implement the stationary replacement operator 2021-06-17 23:03:03 +02:00			`return first_index, second_index`


			`def stationary_replacement(prev_population, current_population):`
			`new_population = prev_population`
			`worst_indexes = get_worst_elements(prev_population)`
			`best_indexes = get_best_elements(current_population)`
			`for worst, best in zip(worst_indexes, best_indexes):`
			`if current_population[best].fitness > prev_population[worst].fitness:`
			`new_population[worst] = current_population[best]`
Implement the generational replacement operator 2021-06-17 22:45:59 +02:00			`return new_population`


Implement the stationary replacement operator 2021-06-17 23:03:03 +02:00			`def replace_population(prev_population, current_population, mode):`
Implement the generational replacement operator 2021-06-17 22:45:59 +02:00			`if mode == "generational":`
Implement the stationary replacement operator 2021-06-17 23:03:03 +02:00			`return generational_replacement(prev_population, current_population)`
			`return stationary_replacement(prev_population, current_population)`
Implement the generational replacement operator 2021-06-17 22:45:59 +02:00

Add population evaluation with multiprocessing 2021-06-18 18:54:34 +02:00			`def evaluate_population(population, data, cores=4):`
			`fitness_func = partial(evaluate_individual, data=data)`
			`with Pool(cores) as pool:`
			`evaluated_population = pool.map(fitness_func, population)`
			`return evaluated_population`


Implement population selection 2021-06-18 19:33:26 +02:00			`def select_new_population(population, n, m, mode):`
			`if mode == "generational":`
			`parents = [tournament_selection(m, population) for _ in range(n)]`
			`else:`
			`parents = [tournament_selection(m, population) for _ in range(2)]`
			`return parents`


Add population evaluation with multiprocessing 2021-06-18 18:54:34 +02:00			`def genetic_algorithm(n, m, data, mode, max_iterations=100000):`
Implement the generational replacement operator 2021-06-17 22:45:59 +02:00			`population = [generate_individual(n, m, data) for _ in range(n)]`
Add population evaluation with multiprocessing 2021-06-18 18:54:34 +02:00			`population = evaluate_population(population, data)`
			`for _ in range(max_iterations):`
Implement population selection 2021-06-18 19:33:26 +02:00			`parents = select_new_population(population, n, m, mode)`
Add population evaluation with multiprocessing 2021-06-18 18:54:34 +02:00

			`n, m, data = parse_file("data/GKD-c_11_n500_m50.txt")`
Implement population selection 2021-06-18 19:33:26 +02:00			`genetic_algorithm(n=10, m=5, data=data, mode="generational", max_iterations=1)`