Perform one hot encoding on the sequences

This commit is contained in:
coolneng 2021-06-25 00:04:01 +02:00
parent e9582d0883
commit 1237394bb1
Signed by: coolneng
GPG Key ID: 9893DA236405AF57
1 changed files with 3 additions and 3 deletions

View File

@ -3,8 +3,7 @@ from typing import Dict, List, Tuple
from Bio.pairwise2 import align
from Bio.SeqIO import parse
from numpy.random import random
from tensorflow import Tensor, int64
from tensorflow.data import TFRecordDataset
from tensorflow import Tensor, int64, one_hot
from tensorflow.data import AUTOTUNE, TFRecordDataset
from tensorflow.io import TFRecordWriter, VarLenFeature, parse_single_example
from tensorflow.sparse import to_dense
@ -78,12 +77,13 @@ def create_dataset(data_file, label_file, dataset_split=[0.8, 0.1, 0.1]) -> None
def transform_features(parsed_features) -> Dict[str, Tensor]:
"""
Transform the parsed features of an Example into a list of dense Tensors
Transform the parsed features of an Example into a list of dense one hot encoded Tensors
"""
features = {}
sparse_features = ["sequence", "label"]
for element in sparse_features:
features[element] = to_dense(parsed_features[element])
features[element] = one_hot(features[element], depth=len(BASES))
return features