diff --git a/Dissertation.org b/Dissertation.org index 796063b..78600af 100644 --- a/Dissertation.org +++ b/Dissertation.org @@ -1,6 +1,6 @@ #+TITLE: Machine Learning para corrección de errores en datos de secuenciación de ADN #+AUTHOR: Amin Kasrou Aouam -#+DATE: 26 de Junio de 2021 +#+DATE: Julio de 2021 #+PANDOC_OPTIONS: template:assets/babathesis.latex #+PANDOC_OPTIONS: toc:t #+PANDOC_OPTIONS: bibliography:assets/bibliography.bib @@ -29,6 +29,16 @@ Next generation sequencing (NGS) have revolutionised genomic research. These tec * Introducción +El ácido desoxirribonucleico (ADN) y el ácido ribonucleico (ARN) son los repositorios moleculares de la información genética. La estructura de cada proteína, y en última instancia de cada biomolécula y componente celular, es producto de la información programada en la secuencia de nucleótidos de una célula. La capacidad de almacenar y transmitir la información genética de una generación a otra es una condición fundamental para la vida. Un segmento de una molécula de ADN que contiene la información necesaria para la síntesis de un producto biológico funcional, ya sea una proteína o un ARN, se denomina gen. El almacenamiento y la transmisión de información biológica son las únicas funciones conocidas del ADN. cite:book:lehninger + +Hay muy pocos principios firmes en biología. A menudo se dice, de una forma u otra, que la única regla real es que no hay reglas, es decir, que se pueden encontrar excepciones a cada principio fundamental si se busca lo suficiente. El principio conocido como el Dogma central de la biología molecular parece ser una excepción a esta regla de excepción ubicua. cite:CRICK1970 El dogma central de la biología molecular establece que una vez que la información ha pasado a proteína no puede volver a salir; \ie la transferencia de información de ácido nucleico a ácido nucleico, o de ácido nucleico a proteína puede ser posible, pero la transferencia de proteína a proteína, o de proteína a ácido nucleico es imposible. cite:crick1958protein + +#+CAPTION: Dogma central de la biología molecular +#+ATTR_HTML: :height 25% :width 80% +#+NAME: fig:central-dogma +[[./assets/figures/central-dogma.png]] + + ** Secuenciación de ADN La secuenciación de ADN es el proceso mediante el cual se determina el orden de los nucleótidos en una secuencia de ADN. En los años 70, Sanger \etal desarrollaron métodos para secuenciar el ADN mediante técnicas de terminación de cadena. cite:Sanger5463 Este avance revolucionó la biología, proporcionando las herramientas necesarias para descifrar genes, y posteriormente, genomas completos. La demanda creciente de un mayor rendimiento llevó a la automatización y paralelización de las tareas de secuenciación. Gracias a estos avances, la técnica de Sanger permitió determinar la primera secuencia del genoma humano en 2004 (Proyecto Genoma Humano). cite:InternationalHumanGenomeSequencingConsortium2004 diff --git a/Dissertation.pdf b/Dissertation.pdf index f73c5c1..5f79fed 100644 Binary files a/Dissertation.pdf and b/Dissertation.pdf differ diff --git a/assets/bibliography.bib b/assets/bibliography.bib index 2bcd75d..497cca4 100644 --- a/assets/bibliography.bib +++ b/assets/bibliography.bib @@ -414,3 +414,96 @@ year = 2017, pages = 204 } + + + +@Article{CRICK1970, + author = {Crick, Francis}, + title = {Central Dogma of Molecular Biology}, + journal = {Nature}, + year = 1970, + month = {Aug}, + day = 01, + volume = 227, + number = 5258, + pages = {561-563}, + abstract = {The central dogma of molecular biology deals with the + detailed residue-by-residue transfer of sequential + information. It states that such information cannot be + transferred from protein to either protein or nucleic acid.}, + issn = {1476-4687}, + doi = {10.1038/227561a0}, + url = {https://doi.org/10.1038/227561a0} +} + +@Article{Salk2018, + author = {Salk, Jesse J. and Schmitt, Michael W. and Loeb, Lawrence + A.}, + title = {Enhancing the accuracy of next-generation sequencing for + detecting rare and subclonal mutations}, + journal = {Nature Reviews Genetics}, + year = 2018, + month = {May}, + day = 01, + volume = 19, + number = 5, + pages = {269-285}, + abstract = {The ability to identify low-frequency genetic variants + among heterogeneous populations of cells or DNA molecules is + important in many fields of basic science, clinical medicine + and other applications, yet current high-throughput DNA + sequencing technologies have an error rate between 1 per 100 + and 1 per 1,000 base pairs sequenced, which obscures their + presence below this level.As next-generation sequencing + technologies evolved over the decade, throughput has improved + markedly, but raw accuracy has remained generally unchanged. + Researchers with a need for high accuracy developed data + filtering methods and incremental biochemical improvements + that modestly improve low-frequency variant detection, but + background errors remain limiting in many fields.The most + profoundly impactful means for reducing errors, first + developed approximately 7 years ago, has been the concept of + single-molecule consensus sequencing. This entails redundant + sequencing of multiple copies of a given specific DNA molecule + and discounting of variants that are not present in all or + most of the copies as likely errors.Consensus sequencing can + be achieved by labelling each molecule with a unique molecular + barcode before generating copies, which allows subsequent + comparison of these copies or schemes whereby copies are + physically joined and sequenced together. Because of + trade-offs in cost, time and accuracy, no single method is + optimal for every application, and each method should be + considered on a case-by-case basis.Major applications for + high-accuracy DNA sequencing include non-invasive cancer + diagnostics, cancer screening, early detection of cancer + relapse or impending drug resistance, infectious disease + applications, prenatal diagnostics, forensics and mutagenesis + assessment.Future advances in ultra-high-accuracy sequencing + are likely to be driven by an emerging generation of + single-molecule sequencers, particularly those that allow + independent sequence comparison of both strands of native DNA + duplexes.}, + issn = {1471-0064}, + doi = {10.1038/nrg.2017.117}, + url = {https://doi.org/10.1038/nrg.2017.117} +} + +@book{book:lehninger, + title = {Lehninger-Principles of Biochemistry}, + author = {Albert Lehninger, David L. Nelson, Michael M. Cox}, + publisher = {W. H. Freeman}, + isbn = {9781429224161,1429224169}, + year = 2008, + edition = {5th Edition}, + pages = 276 +} + +@inproceedings{crick1958protein, + title = {On protein synthesis}, + author = {Crick, Francis HC}, + booktitle = {Symp Soc Exp Biol}, + volume = 12, + number = {138-63}, + pages = 8, + year = 1958 +} diff --git a/assets/figures/central-dogma.png b/assets/figures/central-dogma.png new file mode 100644 index 0000000..7b3867c Binary files /dev/null and b/assets/figures/central-dogma.png differ