Perform one hot encoding on the sequences
This commit is contained in:
parent
e9582d0883
commit
07776a0538
|
@ -3,8 +3,7 @@ from typing import Dict, List, Tuple
|
|||
from Bio.pairwise2 import align
|
||||
from Bio.SeqIO import parse
|
||||
from numpy.random import random
|
||||
from tensorflow import Tensor, int64
|
||||
from tensorflow.data import TFRecordDataset
|
||||
from tensorflow import Tensor, int64, one_hot
|
||||
from tensorflow.data import AUTOTUNE, TFRecordDataset
|
||||
from tensorflow.io import TFRecordWriter, VarLenFeature, parse_single_example
|
||||
from tensorflow.sparse import to_dense
|
||||
|
@ -84,6 +83,7 @@ def transform_features(parsed_features) -> Dict[str, Tensor]:
|
|||
sparse_features = ["sequence", "label"]
|
||||
for element in sparse_features:
|
||||
features[element] = to_dense(parsed_features[element])
|
||||
features[element] = one_hot(features[element], depth=len(BASES))
|
||||
return features
|
||||
|
||||
|
||||
|
|
Loading…
Reference in New Issue