2021-06-26 18:05:40 +02:00
|
|
|
|
@article{10.1093/molbev/msy224,
|
2021-06-28 00:48:32 +02:00
|
|
|
|
author = {Flagel, Lex and Brandvain, Yaniv and Schrider, Daniel R},
|
|
|
|
|
title = "{The Unreasonable Effectiveness of Convolutional Neural
|
|
|
|
|
Networks in Population Genetic Inference}",
|
|
|
|
|
journal = {Molecular Biology and Evolution},
|
|
|
|
|
volume = 36,
|
|
|
|
|
number = 2,
|
|
|
|
|
pages = {220-238},
|
|
|
|
|
year = 2018,
|
|
|
|
|
month = 12,
|
|
|
|
|
abstract = "{Population-scale genomic data sets have given researchers
|
|
|
|
|
incredible amounts of information from which to infer
|
|
|
|
|
evolutionary histories. Concomitant with this flood of data,
|
|
|
|
|
theoretical and methodological advances have sought to extract
|
|
|
|
|
information from genomic sequences to infer demographic events
|
|
|
|
|
such as population size changes and gene flow among closely
|
|
|
|
|
related populations/species, construct recombination maps, and
|
|
|
|
|
uncover loci underlying recent adaptation. To date, most
|
|
|
|
|
methods make use of only one or a few summaries of the input
|
|
|
|
|
sequences and therefore ignore potentially useful information
|
|
|
|
|
encoded in the data. The most sophisticated of these
|
|
|
|
|
approaches involve likelihood calculations, which require
|
|
|
|
|
theoretical advances for each new problem, and often focus on
|
|
|
|
|
a single aspect of the data (e.g., only allele frequency
|
|
|
|
|
information) in the interest of mathematical and computational
|
|
|
|
|
tractability. Directly interrogating the entirety of the input
|
|
|
|
|
sequence data in a likelihood-free manner would thus offer a
|
|
|
|
|
fruitful alternative. Here, we accomplish this by representing
|
|
|
|
|
DNA sequence alignments as images and using a class of deep
|
|
|
|
|
learning methods called convolutional neural networks (CNNs)
|
|
|
|
|
to make population genetic inferences from these images. We
|
|
|
|
|
apply CNNs to a number of evolutionary questions and find that
|
|
|
|
|
they frequently match or exceed the accuracy of current
|
|
|
|
|
methods. Importantly, we show that CNNs perform accurate
|
|
|
|
|
evolutionary model selection and parameter estimation, even on
|
|
|
|
|
problems that have not received detailed theoretical
|
|
|
|
|
treatments. Thus, when applied to population genetic
|
|
|
|
|
alignments, CNNs are capable of outperforming expert-derived
|
|
|
|
|
statistical methods and offer a new path forward in cases
|
|
|
|
|
where no likelihood approach exists.}",
|
|
|
|
|
issn = {0737-4038},
|
|
|
|
|
doi = {10.1093/molbev/msy224},
|
|
|
|
|
url = {https://doi.org/10.1093/molbev/msy224},
|
|
|
|
|
eprint = {https://academic.oup.com/mbe/article-pdf/36/2/220/27736968/msy224.pdf},
|
2021-06-26 18:05:40 +02:00
|
|
|
|
}
|
|
|
|
|
|
2021-06-27 18:21:28 +02:00
|
|
|
|
@Article{pmid19706884,
|
2021-06-28 00:48:32 +02:00
|
|
|
|
Author = "Robins, H. S. and Campregher, P. V. and Srivastava, S. K.
|
|
|
|
|
and Wacher, A. and Turtle, C. J. and Kahsai, O. and Riddell,
|
|
|
|
|
S. R. and Warren, E. H. and Carlson, C. S. ",
|
|
|
|
|
Title = "{{C}omprehensive assessment of {T}-cell receptor beta-chain
|
|
|
|
|
diversity in alphabeta {T} cells}",
|
|
|
|
|
Journal = "Blood",
|
|
|
|
|
Year = 2009,
|
|
|
|
|
Volume = 114,
|
|
|
|
|
Number = 19,
|
|
|
|
|
Pages = "4099--4107",
|
|
|
|
|
Month = "Nov"
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
@article {Nurk2021.05.26.445798,
|
|
|
|
|
author = {Nurk, Sergey and Koren, Sergey and Rhie, Arang and
|
|
|
|
|
Rautiainen, Mikko and Bzikadze, Andrey V. and Mikheenko, Alla
|
|
|
|
|
and Vollger, Mitchell R. and Altemose, Nicolas and Uralsky,
|
|
|
|
|
Lev and Gershman, Ariel and Aganezov, Sergey and Hoyt,
|
|
|
|
|
Savannah J. and Diekhans, Mark and Logsdon, Glennis A. and
|
|
|
|
|
Alonge, Michael and Antonarakis, Stylianos E. and Borchers,
|
|
|
|
|
Matthew and Bouffard, Gerard G. and Brooks, Shelise Y. and
|
|
|
|
|
Caldas, Gina V. and Cheng, Haoyu and Chin, Chen-Shan and Chow,
|
|
|
|
|
William and de Lima, Leonardo G. and Dishuck, Philip C. and
|
|
|
|
|
Durbin, Richard and Dvorkina, Tatiana and Fiddes, Ian T. and
|
|
|
|
|
Formenti, Giulio and Fulton, Robert S. and Fungtammasan,
|
|
|
|
|
Arkarachai and Garrison, Erik and Grady, Patrick G.S. and
|
|
|
|
|
Graves-Lindsay, Tina A. and Hall, Ira M. and Hansen, Nancy F.
|
|
|
|
|
and Hartley, Gabrielle A. and Haukness, Marina and Howe,
|
|
|
|
|
Kerstin and Hunkapiller, Michael W. and Jain, Chirag and Jain,
|
|
|
|
|
Miten and Jarvis, Erich D. and Kerpedjiev, Peter and Kirsche,
|
|
|
|
|
Melanie and Kolmogorov, Mikhail and Korlach, Jonas and
|
|
|
|
|
Kremitzki, Milinn and Li, Heng and Maduro, Valerie V. and
|
|
|
|
|
Marschall, Tobias and McCartney, Ann M. and McDaniel, Jennifer
|
|
|
|
|
and Miller, Danny E. and Mullikin, James C. and Myers, Eugene
|
|
|
|
|
W. and Olson, Nathan D. and Paten, Benedict and Peluso, Paul
|
|
|
|
|
and Pevzner, Pavel A. and Porubsky, David and Potapova, Tamara
|
|
|
|
|
and Rogaev, Evgeny I. and Rosenfeld, Jeffrey A. and Salzberg,
|
|
|
|
|
Steven L. and Schneider, Valerie A. and Sedlazeck, Fritz J.
|
|
|
|
|
and Shafin, Kishwar and Shew, Colin J. and Shumate, Alaina and
|
|
|
|
|
Sims, Yumi and Smit, Arian F. A. and Soto, Daniela C. and
|
|
|
|
|
Sovi{\'c}, Ivan and Storer, Jessica M. and Streets, Aaron and
|
|
|
|
|
Sullivan, Beth A. and Thibaud-Nissen, Fran{\c c}oise and
|
|
|
|
|
Torrance, James and Wagner, Justin and Walenz, Brian P. and
|
|
|
|
|
Wenger, Aaron and Wood, Jonathan M. D. and Xiao, Chunlin and
|
|
|
|
|
Yan, Stephanie M. and Young, Alice C. and Zarate, Samantha and
|
|
|
|
|
Surti, Urvashi and McCoy, Rajiv C. and Dennis, Megan Y. and
|
|
|
|
|
Alexandrov, Ivan A. and Gerton, Jennifer L. and
|
|
|
|
|
O{\textquoteright}Neill, Rachel J. and Timp, Winston and Zook,
|
|
|
|
|
Justin M. and Schatz, Michael C. and Eichler, Evan E. and
|
|
|
|
|
Miga, Karen H. and Phillippy, Adam M.},
|
|
|
|
|
title = {The complete sequence of a human genome},
|
|
|
|
|
elocation-id = {2021.05.26.445798},
|
|
|
|
|
year = 2021,
|
|
|
|
|
doi = {10.1101/2021.05.26.445798},
|
|
|
|
|
publisher = {Cold Spring Harbor Laboratory},
|
|
|
|
|
abstract = {In 2001, Celera Genomics and the International Human Genome
|
|
|
|
|
Sequencing Consortium published their initial drafts of the
|
|
|
|
|
human genome, which revolutionized the field of genomics.
|
|
|
|
|
While these drafts and the updates that followed effectively
|
|
|
|
|
covered the euchromatic fraction of the genome, the
|
|
|
|
|
heterochromatin and many other complex regions were left
|
|
|
|
|
unfinished or erroneous. Addressing this remaining 8\% of the
|
|
|
|
|
genome, the Telomere-to-Telomere (T2T) Consortium has finished
|
|
|
|
|
the first truly complete 3.055 billion base pair (bp) sequence
|
|
|
|
|
of a human genome, representing the largest improvement to the
|
|
|
|
|
human reference genome since its initial release. The new
|
|
|
|
|
T2T-CHM13 reference includes gapless assemblies for all 22
|
|
|
|
|
autosomes plus Chromosome X, corrects numerous errors, and
|
|
|
|
|
introduces nearly 200 million bp of novel sequence containing
|
|
|
|
|
2,226 paralogous gene copies, 115 of which are predicted to be
|
|
|
|
|
protein coding. The newly completed regions include all
|
|
|
|
|
centromeric satellite arrays and the short arms of all five
|
|
|
|
|
acrocentric chromosomes, unlocking these complex regions of
|
|
|
|
|
the genome to variational and functional studies for the first
|
|
|
|
|
time.Competing Interest StatementAF and CSC are employees of
|
|
|
|
|
DNAnexus; IS, JK, MWH, PP, and AW are employees of Pacific
|
|
|
|
|
Biosciences; FJS has received travel funds to speak at events
|
|
|
|
|
hosted by Pacific Biosciences; SK and FJS have received travel
|
|
|
|
|
funds to speak at events hosted by Oxford Nanopore
|
|
|
|
|
Technologies. WT has licensed two patents to Oxford Nanopore
|
|
|
|
|
Technologies (US 8748091 and 8394584).},
|
|
|
|
|
URL = {https://www.biorxiv.org/content/early/2021/05/27/2021.05.26.445798},
|
|
|
|
|
eprint = {https://www.biorxiv.org/content/early/2021/05/27/2021.05.26.445798.full.pdf},
|
|
|
|
|
journal = {bioRxiv}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
@ARTICLE{10.3389/fgene.2020.00900,
|
|
|
|
|
AUTHOR = {Wang, Luotong and Qu, Li and Yang, Longshu and Wang, Yiying
|
|
|
|
|
and Zhu, Huaiqiu},
|
|
|
|
|
TITLE = {NanoReviser: An Error-Correction Tool for Nanopore
|
|
|
|
|
Sequencing Based on a Deep Learning Algorithm},
|
|
|
|
|
JOURNAL = {Frontiers in Genetics},
|
|
|
|
|
VOLUME = 11,
|
|
|
|
|
PAGES = 900,
|
|
|
|
|
YEAR = 2020,
|
|
|
|
|
URL = {https://www.frontiersin.org/article/10.3389/fgene.2020.00900},
|
|
|
|
|
DOI = {10.3389/fgene.2020.00900},
|
|
|
|
|
ISSN = {1664-8021},
|
|
|
|
|
ABSTRACT = {Nanopore sequencing is regarded as one of the most
|
|
|
|
|
promising third-generation sequencing (TGS) technologies.
|
|
|
|
|
Since 2014, Oxford Nanopore Technologies (ONT) has developed a
|
|
|
|
|
series of devices based on nanopore sequencing to produce very
|
|
|
|
|
long reads, with an expected impact on genomics. However, the
|
|
|
|
|
nanopore sequencing reads are susceptible to a fairly high
|
|
|
|
|
error rate owing to the difficulty in identifying the DNA
|
|
|
|
|
bases from the complex electrical signals. Although several
|
|
|
|
|
basecalling tools have been developed for nanopore sequencing
|
|
|
|
|
over the past years, it is still challenging to correct the
|
|
|
|
|
sequences after applying the basecalling procedure. In this
|
|
|
|
|
study, we developed an open-source DNA basecalling reviser,
|
|
|
|
|
NanoReviser, based on a deep learning algorithm to correct the
|
|
|
|
|
basecalling errors introduced by current basecallers provided
|
|
|
|
|
by default. In our module, we re-segmented the raw electrical
|
|
|
|
|
signals based on the basecalled sequences provided by the
|
|
|
|
|
default basecallers. By employing convolution neural networks
|
|
|
|
|
(CNNs) and bidirectional long short-term memory (Bi-LSTM)
|
|
|
|
|
networks, we took advantage of the information from the raw
|
|
|
|
|
electrical signals and the basecalled sequences from the
|
|
|
|
|
basecallers. Our results showed NanoReviser, as a
|
|
|
|
|
post-basecalling reviser, significantly improving the
|
|
|
|
|
basecalling quality. After being trained on standard ONT
|
|
|
|
|
sequencing reads from public E. coli and human NA12878
|
|
|
|
|
datasets, NanoReviser reduced the sequencing error rate by
|
|
|
|
|
over 5% for both the E. coli dataset and the human dataset.
|
|
|
|
|
The performance of NanoReviser was found to be better than
|
|
|
|
|
those of all current basecalling tools. Furthermore, we
|
|
|
|
|
analyzed the modified bases of the E. coli dataset and added
|
|
|
|
|
the methylation information to train our module. With the
|
|
|
|
|
methylation annotation, NanoReviser reduced the error rate by
|
|
|
|
|
7% for the E. coli dataset and specifically reduced the error
|
|
|
|
|
rate by over 10% for the regions of the sequence rich in
|
|
|
|
|
methylated bases. To the best of our knowledge, NanoReviser is
|
|
|
|
|
the first post-processing tool after basecalling to accurately
|
|
|
|
|
correct the nanopore sequences without the time-consuming
|
|
|
|
|
procedure of building the consensus sequence. The NanoReviser
|
|
|
|
|
package is freely available at <ext-link ext-link-type="uri"
|
|
|
|
|
xlink:href="https://github.com/pkubioinformatics/NanoReviser"
|
|
|
|
|
xmlns:xlink="http://www.w3.org/1999/xlink">https://github.com/pkubioinformatics/NanoReviser</ext-link>.}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
@article{HEATHER20161,
|
|
|
|
|
title = {The sequence of sequencers: The history of sequencing DNA},
|
|
|
|
|
journal = {Genomics},
|
|
|
|
|
volume = 107,
|
|
|
|
|
number = 1,
|
|
|
|
|
pages = {1-8},
|
|
|
|
|
year = 2016,
|
|
|
|
|
issn = {0888-7543},
|
|
|
|
|
doi = {https://doi.org/10.1016/j.ygeno.2015.11.003},
|
|
|
|
|
url = {https://www.sciencedirect.com/science/article/pii/S0888754315300410},
|
|
|
|
|
author = {James M. Heather and Benjamin Chain},
|
|
|
|
|
keywords = {DNA, RNA, Sequencing, Sequencer, History},
|
|
|
|
|
abstract = {Determining the order of nucleic acid residues in
|
|
|
|
|
biological samples is an integral component of a wide variety
|
|
|
|
|
of research applications. Over the last fifty years large
|
|
|
|
|
numbers of researchers have applied themselves to the
|
|
|
|
|
production of techniques and technologies to facilitate this
|
|
|
|
|
feat, sequencing DNA and RNA molecules. This time-scale has
|
|
|
|
|
witnessed tremendous changes, moving from sequencing short
|
|
|
|
|
oligonucleotides to millions of bases, from struggling towards
|
|
|
|
|
the deduction of the coding sequence of a single gene to rapid
|
|
|
|
|
and widely available whole genome sequencing. This article
|
|
|
|
|
traverses those years, iterating through the different
|
|
|
|
|
generations of sequencing technology, highlighting some of the
|
|
|
|
|
key discoveries, researchers, and sequences along the way.}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@Article{vanDijk2014,
|
|
|
|
|
author = {van Dijk, Erwin L. and Auger, H{\'e}l{\`e}ne and
|
|
|
|
|
Jaszczyszyn, Yan and Thermes, Claude},
|
|
|
|
|
title = {Ten years of next-generation sequencing technology},
|
|
|
|
|
journal = {Trends in Genetics},
|
|
|
|
|
year = 2014,
|
|
|
|
|
month = {Sep},
|
|
|
|
|
day = 01,
|
|
|
|
|
publisher = {Elsevier},
|
|
|
|
|
volume = 30,
|
|
|
|
|
number = 9,
|
|
|
|
|
pages = {418-426},
|
|
|
|
|
issn = {0168-9525},
|
|
|
|
|
doi = {10.1016/j.tig.2014.07.001},
|
|
|
|
|
url = {https://doi.org/10.1016/j.tig.2014.07.001}
|
2021-06-27 18:21:28 +02:00
|
|
|
|
}
|
2021-06-28 01:56:27 +02:00
|
|
|
|
|
|
|
|
|
@article {Sanger5463,
|
|
|
|
|
author = {Sanger, F. and Nicklen, S. and Coulson, A. R.},
|
|
|
|
|
title = {DNA sequencing with chain-terminating inhibitors},
|
|
|
|
|
volume = 74,
|
|
|
|
|
number = 12,
|
|
|
|
|
pages = {5463--5467},
|
|
|
|
|
year = 1977,
|
|
|
|
|
doi = {10.1073/pnas.74.12.5463},
|
|
|
|
|
publisher = {National Academy of Sciences},
|
|
|
|
|
abstract = {A new method for determining nucleotide sequences in DNA is
|
|
|
|
|
described. It is similar to the {\textquotedblleft}plus and
|
|
|
|
|
minus{\textquotedblright} method [Sanger, F. \& Coulson,
|
|
|
|
|
A. R. (1975) J. Mol. Biol. 94, 441-448] but makes use of the
|
|
|
|
|
2',3'-dideoxy and arabinonucleoside analogues of the normal
|
|
|
|
|
deoxynucleoside triphosphates, which act as specific
|
|
|
|
|
chain-terminating inhibitors of DNA polymerase. The technique
|
|
|
|
|
has been applied to the DNA of bacteriophage ϕX174 and is more
|
|
|
|
|
rapid and more accurate than either the plus or the minus
|
|
|
|
|
method.},
|
|
|
|
|
issn = {0027-8424},
|
|
|
|
|
URL = {https://www.pnas.org/content/74/12/5463},
|
|
|
|
|
eprint = {https://www.pnas.org/content/74/12/5463.full.pdf},
|
|
|
|
|
journal = {Proceedings of the National Academy of Sciences}
|
|
|
|
|
}
|
2021-06-28 19:01:25 +02:00
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@Article{InternationalHumanGenomeSequencingConsortium2004,
|
|
|
|
|
author = {Consortium, International Human Genome Sequencing},
|
|
|
|
|
title = {Finishing the euchromatic sequence of the human genome},
|
|
|
|
|
journal = {Nature},
|
|
|
|
|
year = 2004,
|
|
|
|
|
month = {Oct},
|
|
|
|
|
day = 01,
|
|
|
|
|
volume = 431,
|
|
|
|
|
number = 7011,
|
|
|
|
|
pages = {931-945},
|
|
|
|
|
abstract = {The sequence of the human genome encodes the genetic
|
|
|
|
|
instructions for human physiology, as well as rich information
|
|
|
|
|
about human evolution. In 2001, the International Human Genome
|
|
|
|
|
Sequencing Consortium reported a draft sequence of the
|
|
|
|
|
euchromatic portion of the human genome. Since then, the
|
|
|
|
|
international collaboration has worked to convert this draft
|
|
|
|
|
into a genome sequence with high accuracy and nearly complete
|
|
|
|
|
coverage. Here, we report the result of this finishing
|
|
|
|
|
process. The current genome sequence (Build 35) contains 2.85
|
|
|
|
|
billion nucleotides interrupted by only 341 gaps. It covers
|
|
|
|
|
∼99{\%} of the euchromatic genome and is accurate to an error
|
|
|
|
|
rate of ∼1 event per 100,000 bases. Many of the remaining
|
|
|
|
|
euchromatic gaps are associated with segmental duplications
|
|
|
|
|
and will require focused work with new methods. The
|
|
|
|
|
near-complete sequence, the first for a vertebrate, greatly
|
|
|
|
|
improves the precision of biological analyses of the human
|
|
|
|
|
genome including studies of gene number, birth and death.
|
|
|
|
|
Notably, the human genome seems to encode only 20,000--25,000
|
|
|
|
|
protein-coding genes. The genome sequence reported here should
|
|
|
|
|
serve as a firm foundation for biomedical research in the
|
|
|
|
|
decades ahead.},
|
|
|
|
|
issn = {1476-4687},
|
|
|
|
|
doi = {10.1038/nature03001},
|
|
|
|
|
url = {https://doi.org/10.1038/nature03001}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@Article{Schloss2008,
|
|
|
|
|
author = {Schloss, Jeffery A.},
|
|
|
|
|
title = {How to get genomes at one ten-thousandth the cost},
|
|
|
|
|
journal = {Nature Biotechnology},
|
|
|
|
|
year = 2008,
|
|
|
|
|
month = {Oct},
|
|
|
|
|
day = 01,
|
|
|
|
|
volume = 26,
|
|
|
|
|
number = 10,
|
|
|
|
|
pages = {1113-1115},
|
|
|
|
|
abstract = {The NHGRI's Advanced DNA Sequencing Technology program is
|
|
|
|
|
spearheading the development of platforms that will bring
|
|
|
|
|
routine whole-genome sequencing closer to reality.},
|
|
|
|
|
issn = {1546-1696},
|
|
|
|
|
doi = {10.1038/nbt1008-1113},
|
|
|
|
|
url = {https://doi.org/10.1038/nbt1008-1113}
|
|
|
|
|
}
|
2021-06-29 02:44:36 +02:00
|
|
|
|
|
|
|
|
|
@Article{Shugay2014,
|
|
|
|
|
author = {Shugay, Mikhail and Britanova, Olga V. and Merzlyak,
|
|
|
|
|
Ekaterina M. and Turchaninova, Maria A. and Mamedov, Ilgar Z.
|
|
|
|
|
and Tuganbaev, Timur R. and Bolotin, Dmitriy A. and
|
|
|
|
|
Staroverov, Dmitry B. and Putintseva, Ekaterina V. and
|
|
|
|
|
Plevova, Karla and Linnemann, Carsten and Shagin, Dmitriy and
|
|
|
|
|
Pospisilova, Sarka and Lukyanov, Sergey and Schumacher, Ton N.
|
|
|
|
|
and Chudakov, Dmitriy M.},
|
|
|
|
|
title = {Towards error-free profiling of immune repertoires},
|
|
|
|
|
journal = {Nature Methods},
|
|
|
|
|
year = 2014,
|
|
|
|
|
month = {Jun},
|
|
|
|
|
day = 01,
|
|
|
|
|
volume = 11,
|
|
|
|
|
number = 6,
|
|
|
|
|
pages = {653-655},
|
|
|
|
|
abstract = {A two-step error correction process for high
|
|
|
|
|
throughput--sequenced T- and B-cell receptors allows the
|
|
|
|
|
elimination of most errors while not diminishing the natural
|
|
|
|
|
complexity of the repertoires.},
|
|
|
|
|
issn = {1548-7105},
|
|
|
|
|
doi = {10.1038/nmeth.2960},
|
|
|
|
|
url = {https://doi.org/10.1038/nmeth.2960}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
@Article{Ma2019,
|
|
|
|
|
author = {Ma, Xiaotu and Shao, Ying and Tian, Liqing and Flasch,
|
|
|
|
|
Diane A. and Mulder, Heather L. and Edmonson, Michael N. and
|
|
|
|
|
Liu, Yu and Chen, Xiang and Newman, Scott and Nakitandwe, Joy
|
|
|
|
|
and Li, Yongjin and Li, Benshang and Shen, Shuhong and Wang,
|
|
|
|
|
Zhaoming and Shurtleff, Sheila and Robison, Leslie L. and
|
|
|
|
|
Levy, Shawn and Easton, John and Zhang, Jinghui},
|
|
|
|
|
title = {Analysis of error profiles in deep next-generation
|
|
|
|
|
sequencing data},
|
|
|
|
|
journal = {Genome Biology},
|
|
|
|
|
year = 2019,
|
|
|
|
|
month = {Mar},
|
|
|
|
|
day = 14,
|
|
|
|
|
volume = 20,
|
|
|
|
|
number = 1,
|
|
|
|
|
pages = 50,
|
|
|
|
|
abstract = {Sequencing errors are key confounding factors for detecting
|
|
|
|
|
low-frequency genetic variants that are important for cancer
|
|
|
|
|
molecular diagnosis, treatment, and surveillance using deep
|
|
|
|
|
next-generation sequencing (NGS). However, there is a lack of
|
|
|
|
|
comprehensive understanding of errors introduced at various
|
|
|
|
|
steps of a conventional NGS workflow, such as sample handling,
|
|
|
|
|
library preparation, PCR enrichment, and sequencing. In this
|
|
|
|
|
study, we use current NGS technology to systematically
|
|
|
|
|
investigate these questions.},
|
|
|
|
|
issn = {1474-760X},
|
|
|
|
|
doi = {10.1186/s13059-019-1659-6},
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
@mastersthesis{BenítezCantos-Master,
|
|
|
|
|
author = "María Soledad Benítez Cantos",
|
|
|
|
|
title = "Análisis de repertorios de receptores de células T a partir de datos de secuenciación masiva",
|
|
|
|
|
school = "Universidad de Granada",
|
|
|
|
|
year = "2019",
|
|
|
|
|
month = "{Jul}",
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
@inbook{abbas_lichtman_pillai_2017,
|
|
|
|
|
place = {Philadelphia, PA},
|
|
|
|
|
edition = {9th},
|
|
|
|
|
booktitle = {Cellular and molecular immunology},
|
|
|
|
|
publisher = {Elsevier},
|
|
|
|
|
author = {Abbas, Abul K. and Lichtman, Andrew H. and Pillai, Shiv},
|
|
|
|
|
year = 2017,
|
|
|
|
|
pages = 204
|
|
|
|
|
}
|
2021-06-29 20:00:09 +02:00
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@Article{CRICK1970,
|
|
|
|
|
author = {Crick, Francis},
|
|
|
|
|
title = {Central Dogma of Molecular Biology},
|
|
|
|
|
journal = {Nature},
|
|
|
|
|
year = 1970,
|
|
|
|
|
month = {Aug},
|
|
|
|
|
day = 01,
|
|
|
|
|
volume = 227,
|
|
|
|
|
number = 5258,
|
|
|
|
|
pages = {561-563},
|
|
|
|
|
abstract = {The central dogma of molecular biology deals with the
|
|
|
|
|
detailed residue-by-residue transfer of sequential
|
|
|
|
|
information. It states that such information cannot be
|
|
|
|
|
transferred from protein to either protein or nucleic acid.},
|
|
|
|
|
issn = {1476-4687},
|
|
|
|
|
doi = {10.1038/227561a0},
|
|
|
|
|
url = {https://doi.org/10.1038/227561a0}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
@Article{Salk2018,
|
|
|
|
|
author = {Salk, Jesse J. and Schmitt, Michael W. and Loeb, Lawrence
|
|
|
|
|
A.},
|
|
|
|
|
title = {Enhancing the accuracy of next-generation sequencing for
|
|
|
|
|
detecting rare and subclonal mutations},
|
|
|
|
|
journal = {Nature Reviews Genetics},
|
|
|
|
|
year = 2018,
|
|
|
|
|
month = {May},
|
|
|
|
|
day = 01,
|
|
|
|
|
volume = 19,
|
|
|
|
|
number = 5,
|
|
|
|
|
pages = {269-285},
|
|
|
|
|
abstract = {The ability to identify low-frequency genetic variants
|
|
|
|
|
among heterogeneous populations of cells or DNA molecules is
|
|
|
|
|
important in many fields of basic science, clinical medicine
|
|
|
|
|
and other applications, yet current high-throughput DNA
|
|
|
|
|
sequencing technologies have an error rate between 1 per 100
|
|
|
|
|
and 1 per 1,000 base pairs sequenced, which obscures their
|
|
|
|
|
presence below this level.As next-generation sequencing
|
|
|
|
|
technologies evolved over the decade, throughput has improved
|
|
|
|
|
markedly, but raw accuracy has remained generally unchanged.
|
|
|
|
|
Researchers with a need for high accuracy developed data
|
|
|
|
|
filtering methods and incremental biochemical improvements
|
|
|
|
|
that modestly improve low-frequency variant detection, but
|
|
|
|
|
background errors remain limiting in many fields.The most
|
|
|
|
|
profoundly impactful means for reducing errors, first
|
|
|
|
|
developed approximately 7 years ago, has been the concept of
|
|
|
|
|
single-molecule consensus sequencing. This entails redundant
|
|
|
|
|
sequencing of multiple copies of a given specific DNA molecule
|
|
|
|
|
and discounting of variants that are not present in all or
|
|
|
|
|
most of the copies as likely errors.Consensus sequencing can
|
|
|
|
|
be achieved by labelling each molecule with a unique molecular
|
|
|
|
|
barcode before generating copies, which allows subsequent
|
|
|
|
|
comparison of these copies or schemes whereby copies are
|
|
|
|
|
physically joined and sequenced together. Because of
|
|
|
|
|
trade-offs in cost, time and accuracy, no single method is
|
|
|
|
|
optimal for every application, and each method should be
|
|
|
|
|
considered on a case-by-case basis.Major applications for
|
|
|
|
|
high-accuracy DNA sequencing include non-invasive cancer
|
|
|
|
|
diagnostics, cancer screening, early detection of cancer
|
|
|
|
|
relapse or impending drug resistance, infectious disease
|
|
|
|
|
applications, prenatal diagnostics, forensics and mutagenesis
|
|
|
|
|
assessment.Future advances in ultra-high-accuracy sequencing
|
|
|
|
|
are likely to be driven by an emerging generation of
|
|
|
|
|
single-molecule sequencers, particularly those that allow
|
|
|
|
|
independent sequence comparison of both strands of native DNA
|
|
|
|
|
duplexes.},
|
|
|
|
|
issn = {1471-0064},
|
|
|
|
|
doi = {10.1038/nrg.2017.117},
|
|
|
|
|
url = {https://doi.org/10.1038/nrg.2017.117}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
@book{book:lehninger,
|
|
|
|
|
title = {Lehninger-Principles of Biochemistry},
|
|
|
|
|
author = {Albert Lehninger, David L. Nelson, Michael M. Cox},
|
|
|
|
|
publisher = {W. H. Freeman},
|
|
|
|
|
isbn = {9781429224161,1429224169},
|
|
|
|
|
year = 2008,
|
|
|
|
|
edition = {5th Edition},
|
|
|
|
|
pages = 276
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
@inproceedings{crick1958protein,
|
|
|
|
|
title = {On protein synthesis},
|
|
|
|
|
author = {Crick, Francis HC},
|
|
|
|
|
booktitle = {Symp Soc Exp Biol},
|
|
|
|
|
volume = 12,
|
|
|
|
|
number = {138-63},
|
|
|
|
|
pages = 8,
|
|
|
|
|
year = 1958
|
|
|
|
|
}
|
2021-06-30 00:48:42 +02:00
|
|
|
|
|
|
|
|
|
@article{10.1093/bioinformatics/btg109,
|
|
|
|
|
author = {Lee, Christopher},
|
|
|
|
|
title = "{Generating consensus sequences from partial order multiple
|
|
|
|
|
sequence alignment graphs}",
|
|
|
|
|
journal = {Bioinformatics},
|
|
|
|
|
volume = 19,
|
|
|
|
|
number = 8,
|
|
|
|
|
pages = {999-1008},
|
|
|
|
|
year = 2003,
|
|
|
|
|
month = 05,
|
|
|
|
|
abstract = "{Motivation: Consensus sequence generation is important in
|
|
|
|
|
many kinds of sequence analysis ranging from sequence assembly
|
|
|
|
|
to profile-based iterative search methods. However, how can a
|
|
|
|
|
consensus be constructed when its inherent assumption—that the
|
|
|
|
|
aligned sequences form a single linear consensus—is not
|
|
|
|
|
true?Results: Partial Order Alignment (POA) enables
|
|
|
|
|
construction and analysis of multiple sequence alignments as
|
|
|
|
|
directed acyclic graphs containing complex branching
|
|
|
|
|
structure. Here we present a dynamic programming algorithm
|
|
|
|
|
(heaviest\_bundle) for generating multiple consensus sequences
|
|
|
|
|
from such complex alignments. The number and relationships of
|
|
|
|
|
these consensus sequences reveals the degree of structural
|
|
|
|
|
complexity of the source alignment. This is a powerful and
|
|
|
|
|
general approach for analyzing and visualizing complex
|
|
|
|
|
alignment structures, and can be applied to any alignment. We
|
|
|
|
|
illustrate its value for analyzing expressed sequence
|
|
|
|
|
alignments to detect alternative splicing, reconstruct full
|
|
|
|
|
length mRNA isoform sequences from EST fragments, and separate
|
|
|
|
|
paralog mixtures that can cause incorrect SNP
|
|
|
|
|
predictions.Availability: The heaviest\_bundle source code is
|
|
|
|
|
available at http://www.bioinformatics.ucla.edu/poaContact:
|
|
|
|
|
leec@mbi.ucla.edu*To whom correspondence should be
|
|
|
|
|
addressed.}",
|
|
|
|
|
issn = {1367-4803},
|
|
|
|
|
doi = {10.1093/bioinformatics/btg109},
|
|
|
|
|
url = {https://doi.org/10.1093/bioinformatics/btg109},
|
|
|
|
|
eprint = {https://academic.oup.com/bioinformatics/article-pdf/19/8/999/642375/btg109.pdf},
|
|
|
|
|
}
|
2021-06-30 01:45:45 +02:00
|
|
|
|
|
|
|
|
|
@Article{Nagar2013,
|
|
|
|
|
author = {Nagar, Anurag and Hahsler, Michael},
|
|
|
|
|
title = {Fast discovery and visualization of conserved regions in
|
|
|
|
|
DNA sequences using quasi-alignment},
|
|
|
|
|
journal = {BMC Bioinformatics},
|
|
|
|
|
year = 2013,
|
|
|
|
|
month = {Sep},
|
|
|
|
|
day = 13,
|
|
|
|
|
volume = 14,
|
|
|
|
|
number = 11,
|
|
|
|
|
pages = {S2},
|
|
|
|
|
abstract = {Next Generation Sequencing techniques are producing
|
|
|
|
|
enormous amounts of biological sequence data and analysis
|
|
|
|
|
becomes a major computational problem. Currently, most
|
|
|
|
|
analysis, especially the identification of conserved regions,
|
|
|
|
|
relies heavily on Multiple Sequence Alignment and its various
|
|
|
|
|
heuristics such as progressive alignment, whose run time grows
|
|
|
|
|
with the square of the number and the length of the aligned
|
|
|
|
|
sequences and requires significant computational resources. In
|
|
|
|
|
this work, we present a method to efficiently discover regions
|
|
|
|
|
of high similarity across multiple sequences without
|
|
|
|
|
performing expensive sequence alignment. The method is based
|
|
|
|
|
on approximating edit distance between segments of sequences
|
|
|
|
|
using p-mer frequency counts. Then, efficient high-throughput
|
|
|
|
|
data stream clustering is used to group highly similar
|
|
|
|
|
segments into so called quasi-alignments. Quasi-alignments
|
|
|
|
|
have numerous applications such as identifying species and
|
|
|
|
|
their taxonomic class from sequences, comparing sequences for
|
|
|
|
|
similarities, and, as in this paper, discovering conserved
|
|
|
|
|
regions across related sequences.},
|
|
|
|
|
issn = {1471-2105},
|
|
|
|
|
doi = {10.1186/1471-2105-14-S11-S2},
|
|
|
|
|
url = {https://doi.org/10.1186/1471-2105-14-S11-S2}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
@book{book:771224,
|
|
|
|
|
title = {Artificial Intelligence: A Modern Approach},
|
|
|
|
|
author = {Stuart Russell, Peter Norvig},
|
|
|
|
|
publisher = {Prentice Hall},
|
|
|
|
|
isbn = {0136042597, 9780136042594},
|
|
|
|
|
year = 2010,
|
|
|
|
|
series = {Prentice Hall Series in Artificial Intelligence},
|
2021-07-01 02:53:01 +02:00
|
|
|
|
edition = {3rd},
|
2021-07-01 04:18:48 +02:00
|
|
|
|
pages = {38-45, 48-49, 55-56}
|
2021-07-01 02:53:01 +02:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
@article{McCarthy_Minsky_Rochester_Shannon_2006,
|
|
|
|
|
title = {A Proposal for the Dartmouth Summer Research Project on
|
|
|
|
|
Artificial Intelligence, August 31, 1955},
|
|
|
|
|
volume = 27,
|
|
|
|
|
url = {https://ojs.aaai.org/index.php/aimagazine/article/view/1904},
|
|
|
|
|
DOI = {10.1609/aimag.v27i4.1904},
|
|
|
|
|
abstractNote = {The 1956 Dartmouth summer research project on artificial
|
|
|
|
|
intelligence was initiated by this August 31, 1955 proposal,
|
|
|
|
|
authored by John McCarthy, Marvin Minsky, Nathaniel Rochester,
|
|
|
|
|
and Claude Shannon. The original typescript consisted of 17
|
|
|
|
|
pages plus a title page. Copies of the typescript are housed
|
|
|
|
|
in the archives at Dartmouth College and Stanford University.
|
|
|
|
|
The first 5 papers state the proposal, and the remaining pages
|
|
|
|
|
give qualifications and interests of the four who proposed the
|
|
|
|
|
study. In the interest of brevity, this article reproduces
|
|
|
|
|
only the proposal itself, along with the short
|
|
|
|
|
autobiographical statements of the proposers.},
|
|
|
|
|
number = 4,
|
|
|
|
|
journal = {AI Magazine},
|
|
|
|
|
author = {McCarthy, John and Minsky, Marvin L. and Rochester,
|
|
|
|
|
Nathaniel and Shannon, Claude E.},
|
|
|
|
|
year = 2006,
|
|
|
|
|
month = {Dec.},
|
|
|
|
|
pages = 12
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
@book{book:80129,
|
|
|
|
|
title = {Computational Intelligence. An Introduction},
|
|
|
|
|
author = {Andries P. Engelbrecht},
|
|
|
|
|
publisher = {Wiley},
|
|
|
|
|
isbn = {9780470035610,0470035617},
|
|
|
|
|
year = 2007,
|
|
|
|
|
edition = 2,
|
|
|
|
|
pages = {39-40}
|
2021-06-30 01:45:45 +02:00
|
|
|
|
}
|
2021-07-01 04:18:48 +02:00
|
|
|
|
|
|
|
|
|
@Inbook{Zou2009,
|
|
|
|
|
author = "Zou, Jinming and Han, Yi and So, Sung-Sau",
|
|
|
|
|
editor = "Livingstone, David J.",
|
|
|
|
|
title = "Overview of Artificial Neural Networks",
|
|
|
|
|
bookTitle = "Artificial Neural Networks: Methods and Applications",
|
|
|
|
|
year = 2009,
|
|
|
|
|
publisher = "Humana Press",
|
|
|
|
|
address = "Totowa, NJ",
|
|
|
|
|
pages = "14--22",
|
|
|
|
|
abstract = "The artificial neural network (ANN), or simply neural
|
|
|
|
|
network, is a machine learning method evolved from the idea of
|
|
|
|
|
simulating the human brain. The data explosion in modern drug
|
|
|
|
|
discovery research requires sophisticated analysis methods to
|
|
|
|
|
uncover the hidden causal relationships between single or
|
|
|
|
|
multiple responses and a large set of properties. The ANN is
|
|
|
|
|
one of many versatile tools to meet the demand in drug
|
|
|
|
|
discovery modeling. Compared to a traditional regression
|
|
|
|
|
approach, the ANN is capable of modeling complex nonlinear
|
|
|
|
|
relationships. The ANN also has excellent fault tolerance and
|
|
|
|
|
is fast and highly scalable with parallel processing. This
|
|
|
|
|
chapter introduces the background of ANN development and
|
|
|
|
|
outlines the basic concepts crucially important for
|
|
|
|
|
understanding more sophisticated ANN. Several commonly used
|
|
|
|
|
learning methods and network setups are discussed briefly at
|
|
|
|
|
the end of the chapter.",
|
|
|
|
|
isbn = "978-1-60327-101-1",
|
|
|
|
|
doi = "10.1007/978-1-60327-101-1_2",
|
|
|
|
|
url = "https://doi.org/10.1007/978-1-60327-101-1_2"
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
@book{book:2610592,
|
|
|
|
|
title = {Principles of artificial neural networks},
|
|
|
|
|
author = {Graupe, Daniel},
|
|
|
|
|
publisher = {World Scientific Publ},
|
|
|
|
|
isbn = {9789814522731,9814522732},
|
|
|
|
|
year = 2013,
|
|
|
|
|
edition = {3. ed},
|
|
|
|
|
pages = {28-31}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
@Article{Cireşan2010,
|
|
|
|
|
author = {Cire{\c{s}}an, Dan Claudiu and Meier, Ueli and Gambardella,
|
|
|
|
|
Luca Maria and Schmidhuber, J{\"u}rgen},
|
|
|
|
|
title = {Deep, Big, Simple Neural Nets for Handwritten Digit
|
|
|
|
|
Recognition},
|
|
|
|
|
journal = {Neural Computation},
|
|
|
|
|
year = 2010,
|
|
|
|
|
month = {Dec},
|
|
|
|
|
day = 01,
|
|
|
|
|
volume = 22,
|
|
|
|
|
number = 12,
|
|
|
|
|
pages = {3207-3220},
|
|
|
|
|
abstract = {Good old online backpropagation for plain multilayer
|
|
|
|
|
perceptrons yields a very low 0.35{\%} error rate on the MNIST
|
|
|
|
|
handwritten digits benchmark. All we need to achieve this best
|
|
|
|
|
result so far are many hidden layers, many neurons per layer,
|
|
|
|
|
numerous deformed training images to avoid overfitting, and
|
|
|
|
|
graphics cards to greatly speed up learning.},
|
|
|
|
|
issn = {0899-7667},
|
|
|
|
|
doi = {10.1162/NECO_a_00052},
|
|
|
|
|
url = {https://doi.org/10.1162/NECO_a_00052}
|
|
|
|
|
}
|