diff --git a/src/model.py b/src/model.py index 5b31d86..de6e87c 100644 --- a/src/model.py +++ b/src/model.py @@ -1,11 +1,46 @@ -from tensorflow_io import genome -from tensorflow import Tensor -from typing import Tuple +from tensorflow.keras import Model, Sequential, layers +from tensorflow.keras.regularizers import l2 + +from preprocessing import BASES -def parse_data(filepath) -> Tuple[Tensor, Tensor]: +def build_model(hyper_parameters) -> Model: """ - Reads a FASTQ file and returns the sequences and the qualities + Builds the CNN model """ - HVR = genome.read_fastq(filename=filepath) - return HVR.sequences, HVR.raw_quality + return Sequential( + [ + # Two convolutions + maxpooling blocks + layers.Conv1D( + filters=16, + kernel_size=5, + activation="relu", + kernel_regularizer=l2(hyper_parameters.l2), + ), + layers.MaxPool1D(pool_size=3, strides=1), + layers.Conv1D( + filters=16, + kernel_size=3, + activation="relu", + kernel_regularizer=l2(hyper_parameters.l2), + ), + layers.MaxPool1D(pool_size=3, strides=1), + # Flatten the input volume + layers.Flatten(), + # Two fully connected layers, each followed by a dropout layer + layers.Dense( + units=16, + activation="relu", + kernel_regularizer=l2(hyper_parameters.l2), + ), + layers.Dropout(rate=0.3), + layers.Dense( + units=16, + activation="relu", + kernel_regularizer=l2(hyper_parameters.l2), + ), + layers.Dropout(rate=0.3), + # Output layer with softmax activation + layers.Dense(units=len(BASES), activation="softmax"), + ] + )