diff --git a/docs/experiments.org b/docs/experiments.org deleted file mode 100644 index edad193..0000000 --- a/docs/experiments.org +++ /dev/null @@ -1,72 +0,0 @@ -#+TITLE: Tensorflow experiments -#+AUTHOR: Amin Kasrou Aouam -#+PROPERTY: header-args :session poetry-session -* Experiments - -#+begin_src elisp :results silent -(pyvenv-activate "~/.cache/pypoetry/virtualenvs/locimend-hM_4JND0-py3.8/") -#+end_src - -In this notebook we'll extract knowledge from our generated dataset. First, let's import our dependencies: - -#+begin_src python -from tensorflow_io import genome -#+end_src - -#+RESULTS: -: 2021-05-06 20:41:53.592058: W tensorflow/stream_executor/platform/default/dso_loader.cc:60] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /nix/store/9ilyrqidrjbqvmnn8ykjc7lygdd86g7q-gcc-10.2.0-lib/lib: -: 2021-05-06 20:41:53.592101: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. - - -Tensorflow I/O is an extension that contains a module for genome parsing, we'll use it to import the sequences contained in our FASTQ files: - -#+begin_src python :results silent -def parse_data(filepath): - HVR = genome.read_fastq(filename=filepath) - return HVR.sequences, HVR.raw_quality -#+end_src - -Let's import both the immuneSIM generated HVR dataset and the CuReSim processed one, which contains sequencing errors (mostly indels): - -#+begin_src python -original_HVR, _ = parse_data("../data/HVR.fastq") -processed_HVR, _ = parse_data("../data/CuReSim-HVR.fastq") -print(original_HVR) -print(processed_HVR) -#+end_src - -#+RESULTS: -#+begin_example -tf.Tensor( -[b'TGTGCCAGCAGCTTAACCATCGGACGCAGTACTTCGGGCCAGGCACGCGGCTCCTGG' - b'TGTGCCAGCAGCTTAACCATCGGACGCAGTACTTCGGGCCAGGCACGCGGCTCCTGG' - b'TGTGCCAGCAGCTTAACCATCGGACGCAGTACTTCGGGCCAGGCACGCGGCTCCTGG' - b'TGTGCCAGCAGCTTAACCATCGGACGCAGTACTTCGGGCCAGGCACGCGGCTCCTGG' - b'TGTGCCAGCAGCTTAACCATCGGACGCAGTACTTCGGGCCAGGCACGCGGCTCCTGG' - b'TGTGCCAGCAGCTTAACCATCGGACGCAGTACTTCGGGCCAGGCACGCGGCTCCTGG' - b'TGTGCCAGCAGCTTAACCATCGGACGCAGTACTTCGGGCCAGGCACGCGGCTCCTGG' - b'TGTGCCAGCAGCTTAACCATCGGACGCAGTACTTCGGGCCAGGCACGCGGCTCCTGG' - b'TGTGCCAGCAGCTTAACCATCGGACGCAGTACTTCGGGCCAGGCACGCGGCTCCTGG' - b'TGTGCCAGCAGCTTAACCATCGGACGCAGTACTTCGGGCCAGGCACGCGGCTCCTGG'], shape=(10,), dtype=string) -tf.Tensor( -[b'GCGCCAGCAGCTATTGGATATGGACTAGCTACTC' - b'TGTGCCAGCAGTGATGTGGTGACATGGGTGCGTAGCAATCAGCCAGCATG' - b'GCGCCAGCAGCTTGGATAGGACTAGCTACTT' - b'TGTGCCAGCAGTGAATGGGTGACAGGGTGCGTAGCATCAGCCCCAGCATTT' - b'TTGCGCAGCAGCTTGGATAGGACTAGCTACTT' - b'TGTGCCAGCAGTGAATGGGGACAGGGGCGTAGCAATCAGCCCCAGCATTT' - b'TTGCGCCAGCAGCTTGGATAGGACTAGCTACTT' - b'TGTGCAGCAGTGAATGGGGACAGGGGCGTAGCAATCAGCCCCAGCATTT' - b'TGCGCCAGCAGCTTGGATAGGACTAGCTACTT' - b'TGTGCCAGCAGTGAATGGGGACAGGGGCGTAGCAATCAGCCCAGCATTT' - b'TTGCGCCAGCAGCTTGGATAGGACTAGCTACTT' - b'TGTGCCAGCAGTGAATGGGGACAGGGGCGTAGCAATCAGCCCCAGCATTT' - b'TGCGCCAGCAGCTTGGATAGGACTAGCTACTT' - b'TGTGCCAGCAGTGAATGGGGACAGGGGCGTAGCAATCAGCCCCAGCATTT' - b'TGCGCCAGCAGCTTGGATAGGACTAGCTACTT' - b'TGTGCCAGCAGTGAATGGGGACAGGGGCGTAGCAATCAGCCCCAGCATTT' - b'TGCGCCAGCAGCTTGGATAGGACTAGCTACTT' - b'TGTGCCAGCAGTGAATGGGGACAGGGGCGTAGCAATCAGCCCCAGCATTT' - b'TGCGCCAGCAGCTTGGATAGGACTAGCTACTT' - b'TGTGCCAGCAGTGAATGGGGACAGGGGCGTAGCAATCAGCCCCAGCATTT'], shape=(20,), dtype=string) -#+end_example