diff --git a/src/model.py b/src/model.py index 6c43f2c..c73104a 100644 --- a/src/model.py +++ b/src/model.py @@ -1,28 +1,35 @@ +from random import seed + from tensorflow.keras import Model, Sequential, layers +from tensorflow.keras.callbacks import TensorBoard +from tensorflow.keras.losses import sparse_categorical_crossentropy +from tensorflow.keras.optimizers import Adam from tensorflow.keras.regularizers import l2 +from tensorflow.random import set_seed -from constants import BASES +from constants import * +from preprocessing import dataset_creation -def build_model(hyper_parameters) -> Model: +def build_model() -> Model: """ - Builds the CNN model + Build the CNN model """ - return Sequential( + model = Sequential( [ # Two convolutions + maxpooling blocks layers.Conv1D( filters=16, kernel_size=5, activation="relu", - kernel_regularizer=l2(hyper_parameters.l2), + kernel_regularizer=l2(L2), ), layers.MaxPool1D(pool_size=3, strides=1), layers.Conv1D( filters=16, kernel_size=3, activation="relu", - kernel_regularizer=l2(hyper_parameters.l2), + kernel_regularizer=l2(L2), ), layers.MaxPool1D(pool_size=3, strides=1), # Flatten the input volume @@ -31,16 +38,53 @@ def build_model(hyper_parameters) -> Model: layers.Dense( units=16, activation="relu", - kernel_regularizer=l2(hyper_parameters.l2), + kernel_regularizer=l2(L2), ), layers.Dropout(rate=0.3), layers.Dense( units=16, activation="relu", - kernel_regularizer=l2(hyper_parameters.l2), + kernel_regularizer=l2(L2), ), layers.Dropout(rate=0.3), # Output layer with softmax activation layers.Dense(units=len(BASES), activation="softmax"), ] ) + model.compile( + optimizer=Adam(LEARNING_RATE), + loss=sparse_categorical_crossentropy, + metrics=["accuracy"], + ) + return model + + +def show_metrics(model, eval_dataset, test_dataset) -> None: + """ + Show the model metrics + """ + eval_metrics = model.evaluate(eval_dataset, verbose=0) + test_metrics = model.evaluate(test_dataset, verbose=0) + print(f"Final eval metrics - loss: {eval_metrics[0]} - accuracy: {eval_metrics[1]}") + print(f"Final test metrics - loss: {test_metrics[0]} - accuracy: {test_metrics[1]}") + + +def run(data_file, label_file, seed_value=42) -> None: + """ + Create a dataset, a model and runs training and evaluation on it + """ + seed(seed_value) + set_seed(seed) + train_data, eval_data, test_data = dataset_creation(data_file, label_file) + tensorboard = TensorBoard(log_dir=LOG_DIR, histogram_freq=1, profile_batch=0) + model = build_model() + print("Training the model") + model.fit( + train_data, + epochs=EPOCHS, + validation_data=eval_data, + callbacks=[tensorboard], + verbose=0, + ) + print("Training complete. Obtaining final metrics...") + show_metrics(model, eval_data, test_data)