#+TITLE: Tensorflow experiments #+AUTHOR: Amin Kasrou Aouam #+PROPERTY: header-args :session poetry-session * Experiments #+begin_src elisp :results silent (pyvenv-activate "~/.cache/pypoetry/virtualenvs/locimend-hM_4JND0-py3.8/") #+end_src In this notebook we'll extract knowledge from our generated dataset. First, let's import our dependencies: #+begin_src python from tensorflow_io import genome #+end_src #+RESULTS: : 2021-05-06 20:41:53.592058: W tensorflow/stream_executor/platform/default/dso_loader.cc:60] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /nix/store/9ilyrqidrjbqvmnn8ykjc7lygdd86g7q-gcc-10.2.0-lib/lib: : 2021-05-06 20:41:53.592101: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. Tensorflow I/O is an extension that contains a module for genome parsing, we'll use it to import the sequences contained in our FASTQ files: #+begin_src python :results silent def parse_data(filepath): HVR = genome.read_fastq(filename=filepath) return HVR.sequences, HVR.raw_quality #+end_src Let's import both the immuneSIM generated HVR dataset and the CuReSim processed one, which contains sequencing errors (mostly indels): #+begin_src python original_HVR, _ = parse_data("../data/HVR.fastq") processed_HVR, _ = parse_data("../data/CuReSim-HVR.fastq") print(original_HVR) print(processed_HVR) #+end_src #+RESULTS: #+begin_example tf.Tensor( [b'TGTGCCAGCAGCTTAACCATCGGACGCAGTACTTCGGGCCAGGCACGCGGCTCCTGG' b'TGTGCCAGCAGCTTAACCATCGGACGCAGTACTTCGGGCCAGGCACGCGGCTCCTGG' b'TGTGCCAGCAGCTTAACCATCGGACGCAGTACTTCGGGCCAGGCACGCGGCTCCTGG' b'TGTGCCAGCAGCTTAACCATCGGACGCAGTACTTCGGGCCAGGCACGCGGCTCCTGG' b'TGTGCCAGCAGCTTAACCATCGGACGCAGTACTTCGGGCCAGGCACGCGGCTCCTGG' b'TGTGCCAGCAGCTTAACCATCGGACGCAGTACTTCGGGCCAGGCACGCGGCTCCTGG' b'TGTGCCAGCAGCTTAACCATCGGACGCAGTACTTCGGGCCAGGCACGCGGCTCCTGG' b'TGTGCCAGCAGCTTAACCATCGGACGCAGTACTTCGGGCCAGGCACGCGGCTCCTGG' b'TGTGCCAGCAGCTTAACCATCGGACGCAGTACTTCGGGCCAGGCACGCGGCTCCTGG' b'TGTGCCAGCAGCTTAACCATCGGACGCAGTACTTCGGGCCAGGCACGCGGCTCCTGG'], shape=(10,), dtype=string) tf.Tensor( [b'GCGCCAGCAGCTATTGGATATGGACTAGCTACTC' b'TGTGCCAGCAGTGATGTGGTGACATGGGTGCGTAGCAATCAGCCAGCATG' b'GCGCCAGCAGCTTGGATAGGACTAGCTACTT' b'TGTGCCAGCAGTGAATGGGTGACAGGGTGCGTAGCATCAGCCCCAGCATTT' b'TTGCGCAGCAGCTTGGATAGGACTAGCTACTT' b'TGTGCCAGCAGTGAATGGGGACAGGGGCGTAGCAATCAGCCCCAGCATTT' b'TTGCGCCAGCAGCTTGGATAGGACTAGCTACTT' b'TGTGCAGCAGTGAATGGGGACAGGGGCGTAGCAATCAGCCCCAGCATTT' b'TGCGCCAGCAGCTTGGATAGGACTAGCTACTT' b'TGTGCCAGCAGTGAATGGGGACAGGGGCGTAGCAATCAGCCCAGCATTT' b'TTGCGCCAGCAGCTTGGATAGGACTAGCTACTT' b'TGTGCCAGCAGTGAATGGGGACAGGGGCGTAGCAATCAGCCCCAGCATTT' b'TGCGCCAGCAGCTTGGATAGGACTAGCTACTT' b'TGTGCCAGCAGTGAATGGGGACAGGGGCGTAGCAATCAGCCCCAGCATTT' b'TGCGCCAGCAGCTTGGATAGGACTAGCTACTT' b'TGTGCCAGCAGTGAATGGGGACAGGGGCGTAGCAATCAGCCCCAGCATTT' b'TGCGCCAGCAGCTTGGATAGGACTAGCTACTT' b'TGTGCCAGCAGTGAATGGGGACAGGGGCGTAGCAATCAGCCCCAGCATTT' b'TGCGCCAGCAGCTTGGATAGGACTAGCTACTT' b'TGTGCCAGCAGTGAATGGGGACAGGGGCGTAGCAATCAGCCCCAGCATTT'], shape=(20,), dtype=string) #+end_example