master-thesis/docs/Bibliography.org

446 lines
24 KiB
Org Mode
Raw Normal View History

2022-10-13 13:35:20 +02:00
#+PROPERTY: header-args :exports none :tangle "./bibliography.bib"
#+LATEX_CLASS_OPTIONS: [12pt]
#+LATEX_HEADER: \usepackage[natbib=true]{biblatex} \DeclareFieldFormat{apacase}{#1} \addbibresource{./bibliography.bib}
#+LATEX_HEADER: \usepackage{parskip}
#+OPTIONS: <:nil c:nil todo:nil H:5
#+auto_tangle: t
* Deep Learning
2022-10-20 14:23:02 +02:00
** Attention is All You Need
2022-10-13 13:35:20 +02:00
#+begin_src bibtex
@article{https://doi.org/10.48550/arxiv.1706.03762,
doi = {10.48550/ARXIV.1706.03762},
url = {https://arxiv.org/abs/1706.03762},
author = {Vaswani, Ashish and Shazeer, Noam and Parmar, Niki and
Uszkoreit, Jakob and Jones, Llion and Gomez, Aidan N. and
Kaiser, Lukasz and Polosukhin, Illia},
keywords = {Computation and Language (cs.CL), Machine Learning (cs.LG),
FOS: Computer and information sciences, FOS: Computer and
information sciences},
title = {Attention Is All You Need},
publisher = {arXiv},
year = 2017,
copyright = {arXiv.org perpetual, non-exclusive license}
}
#+end_src
#+LaTeX: \printbibliography[heading=none]
2022-10-20 14:23:02 +02:00
** Axial Attention in Multidimensional Transformers
2022-10-13 13:35:20 +02:00
#+begin_src bibtex
@article{https://doi.org/10.48550/arxiv.1912.12180,
doi = {10.48550/ARXIV.1912.12180},
url = {https://arxiv.org/abs/1912.12180},
author = {Ho, Jonathan and Kalchbrenner, Nal and Weissenborn, Dirk
and Salimans, Tim},
keywords = {Computer Vision and Pattern Recognition (cs.CV), FOS:
Computer and information sciences, FOS: Computer and
information sciences},
title = {Axial Attention in Multidimensional Transformers},
publisher = {arXiv},
year = 2019,
copyright = {arXiv.org perpetual, non-exclusive license}
}
#+end_src
2022-10-20 14:23:02 +02:00
** Longformer: The Long-Document Transformer
2022-10-13 13:35:20 +02:00
#+begin_src bibtex
@article{https://doi.org/10.48550/arxiv.2004.05150,
doi = {10.48550/ARXIV.2004.05150},
url = {https://arxiv.org/abs/2004.05150},
author = {Beltagy, Iz and Peters, Matthew E. and Cohan, Arman},
keywords = {Computation and Language (cs.CL), FOS: Computer and
information sciences, FOS: Computer and information sciences},
title = {Longformer: The Long-Document Transformer},
publisher = {arXiv},
year = 2020,
copyright = {arXiv.org perpetual, non-exclusive license}
}
#+end_src
2022-10-20 14:23:02 +02:00
** Transformer-XL: Attentive Language Models Beyond a Fixed-Length Context
2022-10-13 13:35:20 +02:00
#+begin_src bibtex
@article{https://doi.org/10.48550/arxiv.1901.02860,
doi = {10.48550/ARXIV.1901.02860},
url = {https://arxiv.org/abs/1901.02860},
author = {Dai, Zihang and Yang, Zhilin and Yang, Yiming and
Carbonell, Jaime and Le, Quoc V. and Salakhutdinov, Ruslan},
keywords = {Machine Learning (cs.LG), Computation and Language (cs.CL),
Machine Learning (stat.ML), FOS: Computer and information
sciences, FOS: Computer and information sciences},
title = {Transformer-XL: Attentive Language Models Beyond a
Fixed-Length Context},
publisher = {arXiv},
year = 2019,
copyright = {Creative Commons Attribution Non Commercial Share Alike 4.0
International}
}
#+end_src
2022-10-20 14:23:02 +02:00
** BERT: Pre-training of Deep Bidirectional Transformers for Language Understanding
2022-10-13 13:35:20 +02:00
#+begin_src bibtex
@inproceedings{devlin-etal-2019-bert,
title = "{BERT}: Pre-training of Deep Bidirectional Transformers for
Language Understanding",
author = "Devlin, Jacob and Chang, Ming-Wei and Lee, Kenton and
Toutanova, Kristina",
booktitle = "Proceedings of the 2019 Conference of the North {A}merican
Chapter of the Association for Computational Linguistics:
Human Language Technologies, Volume 1 (Long and Short Papers)",
month = jun,
year = 2019,
address = "Minneapolis, Minnesota",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/N19-1423",
doi = "10.18653/v1/N19-1423",
pages = "4171--4186",
abstract = "We introduce a new language representation model called
BERT, which stands for Bidirectional Encoder Representations
from Transformers. Unlike recent language representation
models (Peters et al., 2018a; Radford et al., 2018), BERT is
designed to pre-train deep bidirectional representations from
unlabeled text by jointly conditioning on both left and right
context in all layers. As a result, the pre-trained BERT model
can be fine-tuned with just one additional output layer to
create state-of-the-art models for a wide range of tasks, such
as question answering and language inference, without
substantial task-specific architecture modifications. BERT is
conceptually simple and empirically powerful. It obtains new
state-of-the-art results on eleven natural language processing
tasks, including pushing the GLUE score to 80.5 (7.7 point
absolute improvement), MultiNLI accuracy to 86.7{\%} (4.6{\%}
absolute improvement), SQuAD v1.1 question answering Test F1
to 93.2 (1.5 point absolute improvement) and SQuAD v2.0 Test
F1 to 83.1 (5.1 point absolute improvement).",
}
#+end_src
A masked language model (MLM) randomly masks some of the tokens from the input, and the objective is to predict the original input based only on its context.
2022-10-20 14:23:02 +02:00
** Fast Transformers with Clustered Attention
2022-10-13 13:35:20 +02:00
#+begin_src bibtex
@article{https://doi.org/10.48550/arxiv.2007.04825,
doi = {10.48550/ARXIV.2007.04825},
url = {https://arxiv.org/abs/2007.04825},
author = {Vyas, Apoorv and Katharopoulos, Angelos and Fleuret,
François},
keywords = {Machine Learning (cs.LG), Machine Learning (stat.ML), FOS:
Computer and information sciences, FOS: Computer and
information sciences},
title = {Fast Transformers with Clustered Attention},
publisher = {arXiv},
year = 2020,
copyright = {arXiv.org perpetual, non-exclusive license}
}
#+end_src
2022-10-20 14:23:02 +02:00
** The elephant in the interpretability room: Why use attention as explanation when we have saliency methods?
2022-10-13 13:35:20 +02:00
#+begin_src bibtex
@inproceedings{bastings-filippova-2020-elephant,
title = "The elephant in the interpretability room: Why use
attention as explanation when we have saliency methods?",
author = "Bastings, Jasmijn and Filippova, Katja",
booktitle = "Proceedings of the Third BlackboxNLP Workshop on Analyzing
and Interpreting Neural Networks for NLP",
month = nov,
year = 2020,
address = "Online",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2020.blackboxnlp-1.14",
doi = "10.18653/v1/2020.blackboxnlp-1.14",
pages = "149--155",
abstract = "There is a recent surge of interest in using attention as
explanation of model predictions, with mixed evidence on
whether attention can be used as such. While attention
conveniently gives us one weight per input token and is easily
extracted, it is often unclear toward what goal it is used as
explanation. We find that often that goal, whether explicitly
stated or not, is to find out what input tokens are the most
relevant to a prediction, and that the implied user for the
explanation is a model developer. For this goal and user, we
argue that input saliency methods are better suited, and that
there are no compelling reasons to use attention, despite the
coincidence that it provides a weight for each input. With
this position paper, we hope to shift some of the recent focus
on attention to saliency methods, and for authors to clearly
state the goal and user for their explanations.",
}
#+end_src
2022-10-20 14:23:02 +02:00
** MultiMAE: Multi-modal Multi-task Masked Autoencoders
#+begin_src bibtex
@article{https://doi.org/10.48550/arxiv.2204.01678,
doi = {10.48550/ARXIV.2204.01678},
url = {https://arxiv.org/abs/2204.01678},
author = {Bachmann, Roman and Mizrahi, David and Atanov, Andrei and
Zamir, Amir},
keywords = {Computer Vision and Pattern Recognition (cs.CV), Machine
Learning (cs.LG), FOS: Computer and information sciences, FOS:
Computer and information sciences},
title = {MultiMAE: Multi-modal Multi-task Masked Autoencoders},
publisher = {arXiv},
year = 2022,
copyright = {arXiv.org perpetual, non-exclusive license}
}
#+end_src
2022-10-13 13:35:20 +02:00
* Deep Learning + Biology
2022-10-20 14:23:02 +02:00
** CpG Transformer for imputation of single-cell methylomes
#+begin_src bibtex
@article{10.1093/bioinformatics/btab746,
author = {De Waele, Gaetan and Clauwaert, Jim and Menschaert, Gerben
and Waegeman, Willem},
title = "{CpG Transformer for imputation of single-cell methylomes}",
journal = {Bioinformatics},
volume = 38,
number = 3,
pages = {597-603},
year = 2021,
month = 10,
abstract = "{The adoption of current single-cell DNA methylation
sequencing protocols is hindered by incomplete coverage,
outlining the need for effective imputation techniques. The
task of imputing single-cell (methylation) data requires
models to build an understanding of underlying biological
processes.We adapt the transformer neural network architecture
to operate on methylation matrices through combining axial
attention with sliding window self-attention. The obtained CpG
Transformer displays state-of-the-art performances on a wide
range of scBS-seq and scRRBS-seq datasets. Furthermore, we
demonstrate the interpretability of CpG Transformer and
illustrate its rapid transfer learning properties, allowing
practitioners to train models on new datasets with a limited
computational and time budget.CpG Transformer is freely
available at
https://github.com/gdewael/cpg-transformer.Supplementary data
are available at Bioinformatics online.}",
issn = {1367-4803},
doi = {10.1093/bioinformatics/btab746},
url = {https://doi.org/10.1093/bioinformatics/btab746},
eprint =
{https://academic.oup.com/bioinformatics/article-pdf/38/3/597/42167564/btab746.pdf},
}
#+end_src
2022-10-13 13:35:20 +02:00
** MSA Transformer
#+begin_src bibtex
@article {Rao2021.02.12.430858,
author = {Rao, Roshan and Liu, Jason and Verkuil, Robert and Meier,
Joshua and Canny, John F. and Abbeel, Pieter and Sercu, Tom
and Rives, Alexander},
title = {MSA Transformer},
elocation-id = {2021.02.12.430858},
year = 2021,
doi = {10.1101/2021.02.12.430858},
publisher = {Cold Spring Harbor Laboratory},
abstract = {Unsupervised protein language models trained across
millions of diverse sequences learn structure and function of
proteins. Protein language models studied to date have been
trained to perform inference from individual sequences. The
longstanding approach in computational biology has been to
make inferences from a family of evo lutionarily related
sequences by fitting a model to each family independently. In
this work we combine the two paradigms. We introduce a protein
language model which takes as input a set of sequences in the
form of a multiple sequence alignment. The model interleaves
row and column attention across the input sequences and is
trained with a variant of the masked language modeling
objective across many protein families. The performance of the
model surpasses current state-of-the-art unsupervised
structure learning methods by a wide margin, with far greater
parameter efficiency than prior state-of-the-art protein
language models.Competing Interest StatementThe authors have
declared no competing interest.},
URL =
{https://www.biorxiv.org/content/early/2021/08/27/2021.02.12.430858},
eprint =
{https://www.biorxiv.org/content/early/2021/08/27/2021.02.12.430858.full.pdf},
journal = {bioRxiv}
}
#+end_src
** Highly accurate protein structure prediction with AlphaFold
#+begin_src bibtex
@article{Jumper2021,
author = {Jumper, John and Evans, Richard and Pritzel, Alexander and
Green, Tim and Figurnov, Michael and Ronneberger, Olaf and
Tunyasuvunakool, Kathryn and Bates, Russ and {\v{Z}}{\'i}dek,
Augustin and Potapenko, Anna and Bridgland, Alex and Meyer,
Clemens and Kohl, Simon A. A. and Ballard, Andrew J. and
Cowie, Andrew and Romera-Paredes, Bernardino and Nikolov,
Stanislav and Jain, Rishub and Adler, Jonas and Back, Trevor
and Petersen, Stig and Reiman, David and Clancy, Ellen and
Zielinski, Michal and Steinegger, Martin and Pacholska,
Michalina and Berghammer, Tamas and Bodenstein, Sebastian and
Silver, David and Vinyals, Oriol and Senior, Andrew W. and
Kavukcuoglu, Koray and Kohli, Pushmeet and Hassabis, Demis},
title = {Highly accurate protein structure prediction with
AlphaFold},
journal = {Nature},
year = 2021,
month = {Aug},
day = 01,
volume = 596,
number = 7873,
pages = {583-589},
abstract = {Proteins are essential to life, and understanding their
structure can facilitate a mechanistic understanding of their
function. Through an enormous experimental effort1--4, the
structures of around 100,000 unique proteins have been
determined5, but this represents a small fraction of the
billions of known protein sequences6,7. Structural coverage is
bottlenecked by the months to years of painstaking effort
required to determine a single protein structure. Accurate
computational approaches are needed to address this gap and to
enable large-scale structural bioinformatics. Predicting the
three-dimensional structure that a protein will adopt based
solely on its amino acid sequence---the structure prediction
component of the `protein folding problem'8---has been an
important open research problem for more than 50 years9.
Despite recent progress10--14, existing methods fall far short
of atomic accuracy, especially when no homologous structure is
available. Here we provide the first computational method that
can regularly predict protein structures with atomic accuracy
even in cases in which no similar structure is known. We
validated an entirely redesigned version of our neural
network-based model, AlphaFold, in the challenging 14th
Critical Assessment of protein Structure Prediction
(CASP14)15, demonstrating accuracy competitive with
experimental structures in a majority of cases and greatly
outperforming other methods. Underpinning the latest version
of AlphaFold is a novel machine learning approach that
incorporates physical and biological knowledge about protein
structure, leveraging multi-sequence alignments, into the
design of the deep learning algorithm.},
issn = {1476-4687},
doi = {10.1038/s41586-021-03819-2},
url = {https://doi.org/10.1038/s41586-021-03819-2}
}
#+end_src
2022-10-20 14:23:02 +02:00
** MultiVI: deep generative model for the integration of multi-modal data
#+begin_src bibtex
@article {Ashuach2021.08.20.457057,
author = {Ashuach, Tal and Gabitto, Mariano I. and Jordan, Michael I.
and Yosef, Nir},
title = {MultiVI: deep generative model for the integration of
multi-modal data},
elocation-id = {2021.08.20.457057},
year = 2021,
doi = {10.1101/2021.08.20.457057},
publisher = {Cold Spring Harbor Laboratory},
abstract = {Jointly profiling the transcriptional and chromatin
accessibility landscapes of single-cells is a powerful
technique to characterize cellular populations. Here we
present MultiVI, a probabilistic model to analyze such
multiomic data and integrate it with single modality datasets.
MultiVI creates a joint representation that accurately
reflects both chromatin and transcriptional properties of the
cells even when one modality is missing. It also imputes
missing data, corrects for batch effects and is available in
the scvi-tools framework:
https://docs.scvi-tools.org/.Competing Interest StatementThe
authors have declared no competing interest.},
URL =
{https://www.biorxiv.org/content/early/2021/09/07/2021.08.20.457057},
eprint =
{https://www.biorxiv.org/content/early/2021/09/07/2021.08.20.457057.full.pdf},
journal = {bioRxiv}
}
#+end_src
2022-10-13 13:35:20 +02:00
* Biology
2022-10-20 14:23:02 +02:00
** Cobolt: integrative analysis of multimodal single-cell sequencing data
#+begin_src bibtex
@article{Gong2021,
author = {Gong, Boying and Zhou, Yun and Purdom, Elizabeth},
title = {Cobolt: integrative analysis of multimodal single-cell
sequencing data},
journal = {Genome Biology},
year = 2021,
month = {Dec},
day = 28,
volume = 22,
number = 1,
pages = 351,
abstract = {A growing number of single-cell sequencing platforms enable
joint profiling of multiple omics from the same cells. We
present Cobolt, a novel method that not only allows for
analyzing the data from joint-modality platforms, but provides
a coherent framework for the integration of multiple datasets
measured on different modalities. We demonstrate its
performance on multi-modality data of gene expression and
chromatin accessibility and illustrate the integration
abilities of Cobolt by jointly analyzing this multi-modality
data with single-cell RNA-seq and ATAC-seq datasets.},
issn = {1474-760X},
doi = {10.1186/s13059-021-02556-z},
url = {https://doi.org/10.1186/s13059-021-02556-z}
}
#+end_src
** MUON: multimodal omics analysis framework
#+begin_src bibtex
@article{Bredikhin2022,
author = {Bredikhin, Danila and Kats, Ilia and Stegle, Oliver},
title = {MUON: multimodal omics analysis framework},
journal = {Genome Biology},
year = 2022,
month = {Feb},
day = 01,
volume = 23,
number = 1,
pages = 42,
abstract = {Advances in multi-omics have led to an explosion of
multimodal datasets to address questions from basic biology to
translation. While these data provide novel opportunities for
discovery, they also pose management and analysis challenges,
thus motivating the development of tailored computational
solutions. Here, we present a data standard and an analysis
framework for multi-omics, MUON, designed to organise,
analyse, visualise, and exchange multimodal data. MUON stores
multimodal data in an efficient yet flexible and interoperable
data structure. MUON enables a versatile range of analyses,
from data preprocessing to flexible multi-omics alignment.},
issn = {1474-760X},
doi = {10.1186/s13059-021-02577-8},
url = {https://doi.org/10.1186/s13059-021-02577-8}
}
#+end_src
** Multimodal single cell data integration challenge: Results and lessons learned
#+begin_src bibtex
@inproceedings{pmlr-v176-lance22a,
title = {Multimodal single cell data integration challenge: Results
and lessons learned},
author = {Lance, Christopher and Luecken, Malte D. and Burkhardt,
Daniel B. and Cannoodt, Robrecht and Rautenstrauch, Pia and
Laddach, Anna and Ubingazhibov, Aidyn and Cao, Zhi-Jie and
Deng, Kaiwen and Khan, Sumeer and Liu, Qiao and Russkikh,
Nikolay and Ryazantsev, Gleb and Ohler, Uwe and data
integration competition participants, NeurIPS 2021 Multimodal
and Pisco, Angela Oliveira and Bloom, Jonathan and
Krishnaswamy, Smita and Theis, Fabian J.},
booktitle = {Proceedings of the NeurIPS 2021 Competitions and
Demonstrations Track},
pages = {162--176},
year = 2022,
editor = {Kiela, Douwe and Ciccone, Marco and Caputo, Barbara},
volume = 176,
series = {Proceedings of Machine Learning Research},
month = {06--14 Dec},
publisher = {PMLR},
pdf = {https://proceedings.mlr.press/v176/lance22a/lance22a.pdf},
url = {https://proceedings.mlr.press/v176/lance22a.html},
abstract = {Biology has become a data-intensive science. Recent
technological advances in single-cell genomics have enabled
the measurement of multiple facets of cellular state,
producing datasets with millions of single-cell observations.
While these data hold great promise for understanding
molecular mechanisms in health and disease, analysis
challenges arising from sparsity, technical and biological
variability, and high dimensionality of the data hinder the
derivation of such mechanistic insights. To promote the
innovation of algorithms for analysis of multimodal
single-cell data, we organized a competition at NeurIPS 2021
applying the Common Task Framework to multimodal single-cell
data integration. For this competition we generated the first
multimodal benchmarking dataset for single-cell biology and
defined three tasks in this domain: prediction of missing
modalities, aligning modalities, and learning a joint
representation across modalities. We further specified
evaluation metrics and developed a cloud-based algorithm
evaluation pipeline. Using this setup, 280 competitors
submitted over 2600 proposed solutions within a 3 month
period, showcasing substantial innovation especially in the
modality alignment task. Here, we present the results,
describe trends of well performing approaches, and discuss
challenges associated with running the competition.}
}
#+end_src