Perform one hot encoding on the sequences
This commit is contained in:
parent
e9582d0883
commit
1237394bb1
|
@ -3,8 +3,7 @@ from typing import Dict, List, Tuple
|
||||||
from Bio.pairwise2 import align
|
from Bio.pairwise2 import align
|
||||||
from Bio.SeqIO import parse
|
from Bio.SeqIO import parse
|
||||||
from numpy.random import random
|
from numpy.random import random
|
||||||
from tensorflow import Tensor, int64
|
from tensorflow import Tensor, int64, one_hot
|
||||||
from tensorflow.data import TFRecordDataset
|
|
||||||
from tensorflow.data import AUTOTUNE, TFRecordDataset
|
from tensorflow.data import AUTOTUNE, TFRecordDataset
|
||||||
from tensorflow.io import TFRecordWriter, VarLenFeature, parse_single_example
|
from tensorflow.io import TFRecordWriter, VarLenFeature, parse_single_example
|
||||||
from tensorflow.sparse import to_dense
|
from tensorflow.sparse import to_dense
|
||||||
|
@ -78,12 +77,13 @@ def create_dataset(data_file, label_file, dataset_split=[0.8, 0.1, 0.1]) -> None
|
||||||
|
|
||||||
def transform_features(parsed_features) -> Dict[str, Tensor]:
|
def transform_features(parsed_features) -> Dict[str, Tensor]:
|
||||||
"""
|
"""
|
||||||
Transform the parsed features of an Example into a list of dense Tensors
|
Transform the parsed features of an Example into a list of dense one hot encoded Tensors
|
||||||
"""
|
"""
|
||||||
features = {}
|
features = {}
|
||||||
sparse_features = ["sequence", "label"]
|
sparse_features = ["sequence", "label"]
|
||||||
for element in sparse_features:
|
for element in sparse_features:
|
||||||
features[element] = to_dense(parsed_features[element])
|
features[element] = to_dense(parsed_features[element])
|
||||||
|
features[element] = one_hot(features[element], depth=len(BASES))
|
||||||
return features
|
return features
|
||||||
|
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue