Compare commits

..

1 Commits

Author SHA1 Message Date
coolneng 34fefed3ed
Add literate programming notebook 2021-05-06 20:44:22 +02:00
1 changed files with 41 additions and 4 deletions

View File

@ -14,8 +14,8 @@ from tensorflow_io import genome
#+end_src #+end_src
#+RESULTS: #+RESULTS:
: 2021-05-06 20:19:46.143707: W tensorflow/stream_executor/platform/default/dso_loader.cc:60] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /nix/store/9ilyrqidrjbqvmnn8ykjc7lygdd86g7q-gcc-10.2.0-lib/lib: : 2021-05-06 20:41:53.592058: W tensorflow/stream_executor/platform/default/dso_loader.cc:60] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /nix/store/9ilyrqidrjbqvmnn8ykjc7lygdd86g7q-gcc-10.2.0-lib/lib:
: 2021-05-06 20:19:46.143750: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. : 2021-05-06 20:41:53.592101: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.
Tensorflow I/O is an extension that contains a module for genome parsing, we'll use it to import the sequences contained in our FASTQ files: Tensorflow I/O is an extension that contains a module for genome parsing, we'll use it to import the sequences contained in our FASTQ files:
@ -29,7 +29,44 @@ def parse_data(filepath):
Let's import both the immuneSIM generated HVR dataset and the CuReSim processed one, which contains sequencing errors (mostly indels): Let's import both the immuneSIM generated HVR dataset and the CuReSim processed one, which contains sequencing errors (mostly indels):
#+begin_src python #+begin_src python
original_HVR, _ = parse_data("data/HVR.fastq") original_HVR, _ = parse_data("../data/HVR.fastq")
processed_HVR, _ = parse_data("data/CuReSim_HVR.fastq") processed_HVR, _ = parse_data("../data/CuReSim-HVR.fastq")
print(original_HVR) print(original_HVR)
print(processed_HVR)
#+end_src #+end_src
#+RESULTS:
#+begin_example
tf.Tensor(
[b'TGTGCCAGCAGCTTAACCATCGGACGCAGTACTTCGGGCCAGGCACGCGGCTCCTGG'
b'TGTGCCAGCAGCTTAACCATCGGACGCAGTACTTCGGGCCAGGCACGCGGCTCCTGG'
b'TGTGCCAGCAGCTTAACCATCGGACGCAGTACTTCGGGCCAGGCACGCGGCTCCTGG'
b'TGTGCCAGCAGCTTAACCATCGGACGCAGTACTTCGGGCCAGGCACGCGGCTCCTGG'
b'TGTGCCAGCAGCTTAACCATCGGACGCAGTACTTCGGGCCAGGCACGCGGCTCCTGG'
b'TGTGCCAGCAGCTTAACCATCGGACGCAGTACTTCGGGCCAGGCACGCGGCTCCTGG'
b'TGTGCCAGCAGCTTAACCATCGGACGCAGTACTTCGGGCCAGGCACGCGGCTCCTGG'
b'TGTGCCAGCAGCTTAACCATCGGACGCAGTACTTCGGGCCAGGCACGCGGCTCCTGG'
b'TGTGCCAGCAGCTTAACCATCGGACGCAGTACTTCGGGCCAGGCACGCGGCTCCTGG'
b'TGTGCCAGCAGCTTAACCATCGGACGCAGTACTTCGGGCCAGGCACGCGGCTCCTGG'], shape=(10,), dtype=string)
tf.Tensor(
[b'GCGCCAGCAGCTATTGGATATGGACTAGCTACTC'
b'TGTGCCAGCAGTGATGTGGTGACATGGGTGCGTAGCAATCAGCCAGCATG'
b'GCGCCAGCAGCTTGGATAGGACTAGCTACTT'
b'TGTGCCAGCAGTGAATGGGTGACAGGGTGCGTAGCATCAGCCCCAGCATTT'
b'TTGCGCAGCAGCTTGGATAGGACTAGCTACTT'
b'TGTGCCAGCAGTGAATGGGGACAGGGGCGTAGCAATCAGCCCCAGCATTT'
b'TTGCGCCAGCAGCTTGGATAGGACTAGCTACTT'
b'TGTGCAGCAGTGAATGGGGACAGGGGCGTAGCAATCAGCCCCAGCATTT'
b'TGCGCCAGCAGCTTGGATAGGACTAGCTACTT'
b'TGTGCCAGCAGTGAATGGGGACAGGGGCGTAGCAATCAGCCCAGCATTT'
b'TTGCGCCAGCAGCTTGGATAGGACTAGCTACTT'
b'TGTGCCAGCAGTGAATGGGGACAGGGGCGTAGCAATCAGCCCCAGCATTT'
b'TGCGCCAGCAGCTTGGATAGGACTAGCTACTT'
b'TGTGCCAGCAGTGAATGGGGACAGGGGCGTAGCAATCAGCCCCAGCATTT'
b'TGCGCCAGCAGCTTGGATAGGACTAGCTACTT'
b'TGTGCCAGCAGTGAATGGGGACAGGGGCGTAGCAATCAGCCCCAGCATTT'
b'TGCGCCAGCAGCTTGGATAGGACTAGCTACTT'
b'TGTGCCAGCAGTGAATGGGGACAGGGGCGTAGCAATCAGCCCCAGCATTT'
b'TGCGCCAGCAGCTTGGATAGGACTAGCTACTT'
b'TGTGCCAGCAGTGAATGGGGACAGGGGCGTAGCAATCAGCCCCAGCATTT'], shape=(20,), dtype=string)
#+end_example