Rename ref_sequence to label
This commit is contained in:
parent
035162bd8d
commit
38903c5737
|
@ -11,7 +11,7 @@ from tensorflow.train import Example, Feature, Features, FloatList, Int64List
|
|||
from constants import *
|
||||
|
||||
|
||||
def generate_example(sequence, reference_sequence, weight_matrix) -> bytes:
|
||||
def generate_example(sequence, label, weight_matrix) -> bytes:
|
||||
"""
|
||||
Create a binary-string for each sequence containing the sequence and the bases' frequency
|
||||
"""
|
||||
|
@ -19,9 +19,7 @@ def generate_example(sequence, reference_sequence, weight_matrix) -> bytes:
|
|||
"sequence": Feature(
|
||||
int64_list=Int64List(value=list(encode_sequence(sequence)))
|
||||
),
|
||||
"reference_sequence": Feature(
|
||||
int64_list=Int64List(value=list(encode_sequence(reference_sequence)))
|
||||
),
|
||||
"label": Feature(int64_list=Int64List(value=list(encode_sequence(label)))),
|
||||
"A_counts": Feature(float_list=FloatList(value=weight_matrix["A"])),
|
||||
"C_counts": Feature(float_list=FloatList(value=weight_matrix["C"])),
|
||||
"G_counts": Feature(float_list=FloatList(value=weight_matrix["G"])),
|
||||
|
@ -49,14 +47,14 @@ def read_fastq(data_file, label_file) -> List[bytes]:
|
|||
motifs = create([element.seq])
|
||||
example = generate_example(
|
||||
sequence=str(element.seq),
|
||||
reference_sequence=str(label.seq),
|
||||
label=str(label.seq),
|
||||
weight_matrix=motifs.pwm,
|
||||
)
|
||||
examples.append(example)
|
||||
return examples
|
||||
|
||||
|
||||
def create_dataset(filepath) -> None:
|
||||
def create_dataset(data_file, label_file) -> None:
|
||||
"""
|
||||
Create a training and test dataset with a 70/30 split respectively
|
||||
"""
|
||||
|
@ -76,7 +74,7 @@ def process_input(byte_string) -> Example:
|
|||
"""
|
||||
schema = {
|
||||
"sequence": FixedLenFeature(shape=[], dtype=int64),
|
||||
"reference_sequence": FixedLenFeature(shape=[], dtype=int64),
|
||||
"label": FixedLenFeature(shape=[], dtype=int64),
|
||||
"A_counts": FixedLenFeature(shape=[], dtype=float32),
|
||||
"C_counts": FixedLenFeature(shape=[], dtype=float32),
|
||||
"G_counts": FixedLenFeature(shape=[], dtype=float32),
|
||||
|
|
Loading…
Reference in New Issue