diff --git a/src/constants.py b/src/constants.py index 1aaf633..a0c9606 100644 --- a/src/constants.py +++ b/src/constants.py @@ -3,7 +3,7 @@ TRAIN_DATASET = "data/train_data.tfrecords" TEST_DATASET = "data/test_data.tfrecords" EVAL_DATASET = "data/eval_data.tfrecords" EPOCHS = 1000 -BATCH_SIZE = 256 +BATCH_SIZE = 1 LEARNING_RATE = 0.004 L2 = 0.001 LOG_DIR = "logs" diff --git a/src/model.py b/src/model.py index 8ef83ec..364ae3c 100644 --- a/src/model.py +++ b/src/model.py @@ -1,8 +1,9 @@ from random import seed -from tensorflow.keras import Model, Sequential, layers +from tensorflow.keras import Model, Sequential +from tensorflow.keras.layers import * from tensorflow.keras.callbacks import TensorBoard -from tensorflow.keras.losses import sparse_categorical_crossentropy +from tensorflow.keras.losses import categorical_crossentropy from tensorflow.keras.optimizers import Adam from tensorflow.keras.regularizers import l2 from tensorflow.random import set_seed @@ -15,47 +16,28 @@ def build_model() -> Model: """ Build the CNN model """ - model = Sequential() - model.add( - layers.Conv1D( - filters=16, - kernel_size=5, - activation="relu", - kernel_regularizer=l2(L2), - ) + model = Sequential( + [ + Input(shape=(None, len(BASES))), + Conv1D( + filters=16, kernel_size=5, activation="relu", kernel_regularizer=l2(L2) + ), + MaxPool1D(pool_size=3, strides=1), + Conv1D( + filters=16, kernel_size=3, activation="relu", kernel_regularizer=l2(L2) + ), + MaxPool1D(pool_size=3, strides=1), + GlobalAveragePooling1D(), + Dense(units=16, activation="relu", kernel_regularizer=l2(L2)), + Dropout(rate=0.3), + Dense(units=16, activation="relu", kernel_regularizer=l2(L2)), + Dropout(rate=0.3), + Dense(units=len(BASES), activation="softmax"), + ] ) - model.add(layers.MaxPool1D(pool_size=3, strides=1)) - model.add( - layers.Conv1D( - filters=16, - kernel_size=3, - activation="relu", - kernel_regularizer=l2(L2), - ) - ) - model.add(layers.MaxPool1D(pool_size=3, strides=1)) - model.add(layers.Flatten()) - model.add( - layers.Dense( - units=16, - activation="relu", - kernel_regularizer=l2(L2), - ) - ) - model.add(layers.Dropout(rate=0.3)) - model.add( - layers.Dense( - units=16, - activation="relu", - kernel_regularizer=l2(L2), - ) - ) - model.add(layers.Dropout(rate=0.3)) - # FIXME Change output size - model.add(layers.Dense(units=len(BASES), activation="softmax")) model.compile( optimizer=Adam(LEARNING_RATE), - loss=sparse_categorical_crossentropy, + loss=categorical_crossentropy, metrics=["accuracy"], ) return model diff --git a/src/preprocessing.py b/src/preprocessing.py index 0b0c52a..756300f 100644 --- a/src/preprocessing.py +++ b/src/preprocessing.py @@ -50,10 +50,7 @@ def read_fastq(data_file, label_file) -> List[bytes]: examples = [] with open(data_file) as data, open(label_file) as labels: for element, label in zip(parse(data, "fastq"), parse(labels, "fastq")): - example = generate_example( - sequence=str(element.seq), - label=str(label.seq), - ) + example = generate_example(sequence=str(element.seq), label=str(label.seq)) examples.append(example) return examples