402 lines
23 KiB
BibTeX
402 lines
23 KiB
BibTeX
@article{https://doi.org/10.48550/arxiv.1706.03762,
|
||
doi = {10.48550/ARXIV.1706.03762},
|
||
url = {https://arxiv.org/abs/1706.03762},
|
||
author = {Vaswani, Ashish and Shazeer, Noam and Parmar, Niki and
|
||
Uszkoreit, Jakob and Jones, Llion and Gomez, Aidan N. and
|
||
Kaiser, Lukasz and Polosukhin, Illia},
|
||
keywords = {Computation and Language (cs.CL), Machine Learning (cs.LG),
|
||
FOS: Computer and information sciences, FOS: Computer and
|
||
information sciences},
|
||
title = {Attention Is All You Need},
|
||
publisher = {arXiv},
|
||
year = 2017,
|
||
copyright = {arXiv.org perpetual, non-exclusive license}
|
||
}
|
||
|
||
@article{https://doi.org/10.48550/arxiv.1912.12180,
|
||
doi = {10.48550/ARXIV.1912.12180},
|
||
url = {https://arxiv.org/abs/1912.12180},
|
||
author = {Ho, Jonathan and Kalchbrenner, Nal and Weissenborn, Dirk
|
||
and Salimans, Tim},
|
||
keywords = {Computer Vision and Pattern Recognition (cs.CV), FOS:
|
||
Computer and information sciences, FOS: Computer and
|
||
information sciences},
|
||
title = {Axial Attention in Multidimensional Transformers},
|
||
publisher = {arXiv},
|
||
year = 2019,
|
||
copyright = {arXiv.org perpetual, non-exclusive license}
|
||
}
|
||
|
||
@article{https://doi.org/10.48550/arxiv.2004.05150,
|
||
doi = {10.48550/ARXIV.2004.05150},
|
||
url = {https://arxiv.org/abs/2004.05150},
|
||
author = {Beltagy, Iz and Peters, Matthew E. and Cohan, Arman},
|
||
keywords = {Computation and Language (cs.CL), FOS: Computer and
|
||
information sciences, FOS: Computer and information sciences},
|
||
title = {Longformer: The Long-Document Transformer},
|
||
publisher = {arXiv},
|
||
year = 2020,
|
||
copyright = {arXiv.org perpetual, non-exclusive license}
|
||
}
|
||
|
||
@article{https://doi.org/10.48550/arxiv.1901.02860,
|
||
doi = {10.48550/ARXIV.1901.02860},
|
||
url = {https://arxiv.org/abs/1901.02860},
|
||
author = {Dai, Zihang and Yang, Zhilin and Yang, Yiming and
|
||
Carbonell, Jaime and Le, Quoc V. and Salakhutdinov, Ruslan},
|
||
keywords = {Machine Learning (cs.LG), Computation and Language (cs.CL),
|
||
Machine Learning (stat.ML), FOS: Computer and information
|
||
sciences, FOS: Computer and information sciences},
|
||
title = {Transformer-XL: Attentive Language Models Beyond a
|
||
Fixed-Length Context},
|
||
publisher = {arXiv},
|
||
year = 2019,
|
||
copyright = {Creative Commons Attribution Non Commercial Share Alike 4.0
|
||
International}
|
||
}
|
||
|
||
@inproceedings{devlin-etal-2019-bert,
|
||
title = "{BERT}: Pre-training of Deep Bidirectional Transformers for
|
||
Language Understanding",
|
||
author = "Devlin, Jacob and Chang, Ming-Wei and Lee, Kenton and
|
||
Toutanova, Kristina",
|
||
booktitle = "Proceedings of the 2019 Conference of the North {A}merican
|
||
Chapter of the Association for Computational Linguistics:
|
||
Human Language Technologies, Volume 1 (Long and Short Papers)",
|
||
month = jun,
|
||
year = 2019,
|
||
address = "Minneapolis, Minnesota",
|
||
publisher = "Association for Computational Linguistics",
|
||
url = "https://aclanthology.org/N19-1423",
|
||
doi = "10.18653/v1/N19-1423",
|
||
pages = "4171--4186",
|
||
abstract = "We introduce a new language representation model called
|
||
BERT, which stands for Bidirectional Encoder Representations
|
||
from Transformers. Unlike recent language representation
|
||
models (Peters et al., 2018a; Radford et al., 2018), BERT is
|
||
designed to pre-train deep bidirectional representations from
|
||
unlabeled text by jointly conditioning on both left and right
|
||
context in all layers. As a result, the pre-trained BERT model
|
||
can be fine-tuned with just one additional output layer to
|
||
create state-of-the-art models for a wide range of tasks, such
|
||
as question answering and language inference, without
|
||
substantial task-specific architecture modifications. BERT is
|
||
conceptually simple and empirically powerful. It obtains new
|
||
state-of-the-art results on eleven natural language processing
|
||
tasks, including pushing the GLUE score to 80.5 (7.7 point
|
||
absolute improvement), MultiNLI accuracy to 86.7{\%} (4.6{\%}
|
||
absolute improvement), SQuAD v1.1 question answering Test F1
|
||
to 93.2 (1.5 point absolute improvement) and SQuAD v2.0 Test
|
||
F1 to 83.1 (5.1 point absolute improvement).",
|
||
}
|
||
|
||
@article{https://doi.org/10.48550/arxiv.2007.04825,
|
||
doi = {10.48550/ARXIV.2007.04825},
|
||
url = {https://arxiv.org/abs/2007.04825},
|
||
author = {Vyas, Apoorv and Katharopoulos, Angelos and Fleuret,
|
||
François},
|
||
keywords = {Machine Learning (cs.LG), Machine Learning (stat.ML), FOS:
|
||
Computer and information sciences, FOS: Computer and
|
||
information sciences},
|
||
title = {Fast Transformers with Clustered Attention},
|
||
publisher = {arXiv},
|
||
year = 2020,
|
||
copyright = {arXiv.org perpetual, non-exclusive license}
|
||
}
|
||
|
||
@inproceedings{bastings-filippova-2020-elephant,
|
||
title = "The elephant in the interpretability room: Why use
|
||
attention as explanation when we have saliency methods?",
|
||
author = "Bastings, Jasmijn and Filippova, Katja",
|
||
booktitle = "Proceedings of the Third BlackboxNLP Workshop on Analyzing
|
||
and Interpreting Neural Networks for NLP",
|
||
month = nov,
|
||
year = 2020,
|
||
address = "Online",
|
||
publisher = "Association for Computational Linguistics",
|
||
url = "https://aclanthology.org/2020.blackboxnlp-1.14",
|
||
doi = "10.18653/v1/2020.blackboxnlp-1.14",
|
||
pages = "149--155",
|
||
abstract = "There is a recent surge of interest in using attention as
|
||
explanation of model predictions, with mixed evidence on
|
||
whether attention can be used as such. While attention
|
||
conveniently gives us one weight per input token and is easily
|
||
extracted, it is often unclear toward what goal it is used as
|
||
explanation. We find that often that goal, whether explicitly
|
||
stated or not, is to find out what input tokens are the most
|
||
relevant to a prediction, and that the implied user for the
|
||
explanation is a model developer. For this goal and user, we
|
||
argue that input saliency methods are better suited, and that
|
||
there are no compelling reasons to use attention, despite the
|
||
coincidence that it provides a weight for each input. With
|
||
this position paper, we hope to shift some of the recent focus
|
||
on attention to saliency methods, and for authors to clearly
|
||
state the goal and user for their explanations.",
|
||
}
|
||
|
||
@article{https://doi.org/10.48550/arxiv.2204.01678,
|
||
doi = {10.48550/ARXIV.2204.01678},
|
||
url = {https://arxiv.org/abs/2204.01678},
|
||
author = {Bachmann, Roman and Mizrahi, David and Atanov, Andrei and
|
||
Zamir, Amir},
|
||
keywords = {Computer Vision and Pattern Recognition (cs.CV), Machine
|
||
Learning (cs.LG), FOS: Computer and information sciences, FOS:
|
||
Computer and information sciences},
|
||
title = {MultiMAE: Multi-modal Multi-task Masked Autoencoders},
|
||
publisher = {arXiv},
|
||
year = 2022,
|
||
copyright = {arXiv.org perpetual, non-exclusive license}
|
||
}
|
||
|
||
@article{10.1093/bioinformatics/btab746,
|
||
author = {De Waele, Gaetan and Clauwaert, Jim and Menschaert, Gerben
|
||
and Waegeman, Willem},
|
||
title = "{CpG Transformer for imputation of single-cell methylomes}",
|
||
journal = {Bioinformatics},
|
||
volume = 38,
|
||
number = 3,
|
||
pages = {597-603},
|
||
year = 2021,
|
||
month = 10,
|
||
abstract = "{The adoption of current single-cell DNA methylation
|
||
sequencing protocols is hindered by incomplete coverage,
|
||
outlining the need for effective imputation techniques. The
|
||
task of imputing single-cell (methylation) data requires
|
||
models to build an understanding of underlying biological
|
||
processes.We adapt the transformer neural network architecture
|
||
to operate on methylation matrices through combining axial
|
||
attention with sliding window self-attention. The obtained CpG
|
||
Transformer displays state-of-the-art performances on a wide
|
||
range of scBS-seq and scRRBS-seq datasets. Furthermore, we
|
||
demonstrate the interpretability of CpG Transformer and
|
||
illustrate its rapid transfer learning properties, allowing
|
||
practitioners to train models on new datasets with a limited
|
||
computational and time budget.CpG Transformer is freely
|
||
available at
|
||
https://github.com/gdewael/cpg-transformer.Supplementary data
|
||
are available at Bioinformatics online.}",
|
||
issn = {1367-4803},
|
||
doi = {10.1093/bioinformatics/btab746},
|
||
url = {https://doi.org/10.1093/bioinformatics/btab746},
|
||
eprint =
|
||
{https://academic.oup.com/bioinformatics/article-pdf/38/3/597/42167564/btab746.pdf},
|
||
}
|
||
|
||
@article {Rao2021.02.12.430858,
|
||
author = {Rao, Roshan and Liu, Jason and Verkuil, Robert and Meier,
|
||
Joshua and Canny, John F. and Abbeel, Pieter and Sercu, Tom
|
||
and Rives, Alexander},
|
||
title = {MSA Transformer},
|
||
elocation-id = {2021.02.12.430858},
|
||
year = 2021,
|
||
doi = {10.1101/2021.02.12.430858},
|
||
publisher = {Cold Spring Harbor Laboratory},
|
||
abstract = {Unsupervised protein language models trained across
|
||
millions of diverse sequences learn structure and function of
|
||
proteins. Protein language models studied to date have been
|
||
trained to perform inference from individual sequences. The
|
||
longstanding approach in computational biology has been to
|
||
make inferences from a family of evo lutionarily related
|
||
sequences by fitting a model to each family independently. In
|
||
this work we combine the two paradigms. We introduce a protein
|
||
language model which takes as input a set of sequences in the
|
||
form of a multiple sequence alignment. The model interleaves
|
||
row and column attention across the input sequences and is
|
||
trained with a variant of the masked language modeling
|
||
objective across many protein families. The performance of the
|
||
model surpasses current state-of-the-art unsupervised
|
||
structure learning methods by a wide margin, with far greater
|
||
parameter efficiency than prior state-of-the-art protein
|
||
language models.Competing Interest StatementThe authors have
|
||
declared no competing interest.},
|
||
URL =
|
||
{https://www.biorxiv.org/content/early/2021/08/27/2021.02.12.430858},
|
||
eprint =
|
||
{https://www.biorxiv.org/content/early/2021/08/27/2021.02.12.430858.full.pdf},
|
||
journal = {bioRxiv}
|
||
}
|
||
|
||
@article{Jumper2021,
|
||
author = {Jumper, John and Evans, Richard and Pritzel, Alexander and
|
||
Green, Tim and Figurnov, Michael and Ronneberger, Olaf and
|
||
Tunyasuvunakool, Kathryn and Bates, Russ and {\v{Z}}{\'i}dek,
|
||
Augustin and Potapenko, Anna and Bridgland, Alex and Meyer,
|
||
Clemens and Kohl, Simon A. A. and Ballard, Andrew J. and
|
||
Cowie, Andrew and Romera-Paredes, Bernardino and Nikolov,
|
||
Stanislav and Jain, Rishub and Adler, Jonas and Back, Trevor
|
||
and Petersen, Stig and Reiman, David and Clancy, Ellen and
|
||
Zielinski, Michal and Steinegger, Martin and Pacholska,
|
||
Michalina and Berghammer, Tamas and Bodenstein, Sebastian and
|
||
Silver, David and Vinyals, Oriol and Senior, Andrew W. and
|
||
Kavukcuoglu, Koray and Kohli, Pushmeet and Hassabis, Demis},
|
||
title = {Highly accurate protein structure prediction with
|
||
AlphaFold},
|
||
journal = {Nature},
|
||
year = 2021,
|
||
month = {Aug},
|
||
day = 01,
|
||
volume = 596,
|
||
number = 7873,
|
||
pages = {583-589},
|
||
abstract = {Proteins are essential to life, and understanding their
|
||
structure can facilitate a mechanistic understanding of their
|
||
function. Through an enormous experimental effort1--4, the
|
||
structures of around 100,000 unique proteins have been
|
||
determined5, but this represents a small fraction of the
|
||
billions of known protein sequences6,7. Structural coverage is
|
||
bottlenecked by the months to years of painstaking effort
|
||
required to determine a single protein structure. Accurate
|
||
computational approaches are needed to address this gap and to
|
||
enable large-scale structural bioinformatics. Predicting the
|
||
three-dimensional structure that a protein will adopt based
|
||
solely on its amino acid sequence---the structure prediction
|
||
component of the `protein folding problem'8---has been an
|
||
important open research problem for more than 50 years9.
|
||
Despite recent progress10--14, existing methods fall far short
|
||
of atomic accuracy, especially when no homologous structure is
|
||
available. Here we provide the first computational method that
|
||
can regularly predict protein structures with atomic accuracy
|
||
even in cases in which no similar structure is known. We
|
||
validated an entirely redesigned version of our neural
|
||
network-based model, AlphaFold, in the challenging 14th
|
||
Critical Assessment of protein Structure Prediction
|
||
(CASP14)15, demonstrating accuracy competitive with
|
||
experimental structures in a majority of cases and greatly
|
||
outperforming other methods. Underpinning the latest version
|
||
of AlphaFold is a novel machine learning approach that
|
||
incorporates physical and biological knowledge about protein
|
||
structure, leveraging multi-sequence alignments, into the
|
||
design of the deep learning algorithm.},
|
||
issn = {1476-4687},
|
||
doi = {10.1038/s41586-021-03819-2},
|
||
url = {https://doi.org/10.1038/s41586-021-03819-2}
|
||
}
|
||
|
||
@article {Ashuach2021.08.20.457057,
|
||
author = {Ashuach, Tal and Gabitto, Mariano I. and Jordan, Michael I.
|
||
and Yosef, Nir},
|
||
title = {MultiVI: deep generative model for the integration of
|
||
multi-modal data},
|
||
elocation-id = {2021.08.20.457057},
|
||
year = 2021,
|
||
doi = {10.1101/2021.08.20.457057},
|
||
publisher = {Cold Spring Harbor Laboratory},
|
||
abstract = {Jointly profiling the transcriptional and chromatin
|
||
accessibility landscapes of single-cells is a powerful
|
||
technique to characterize cellular populations. Here we
|
||
present MultiVI, a probabilistic model to analyze such
|
||
multiomic data and integrate it with single modality datasets.
|
||
MultiVI creates a joint representation that accurately
|
||
reflects both chromatin and transcriptional properties of the
|
||
cells even when one modality is missing. It also imputes
|
||
missing data, corrects for batch effects and is available in
|
||
the scvi-tools framework:
|
||
https://docs.scvi-tools.org/.Competing Interest StatementThe
|
||
authors have declared no competing interest.},
|
||
URL =
|
||
{https://www.biorxiv.org/content/early/2021/09/07/2021.08.20.457057},
|
||
eprint =
|
||
{https://www.biorxiv.org/content/early/2021/09/07/2021.08.20.457057.full.pdf},
|
||
journal = {bioRxiv}
|
||
}
|
||
|
||
@article{Gong2021,
|
||
author = {Gong, Boying and Zhou, Yun and Purdom, Elizabeth},
|
||
title = {Cobolt: integrative analysis of multimodal single-cell
|
||
sequencing data},
|
||
journal = {Genome Biology},
|
||
year = 2021,
|
||
month = {Dec},
|
||
day = 28,
|
||
volume = 22,
|
||
number = 1,
|
||
pages = 351,
|
||
abstract = {A growing number of single-cell sequencing platforms enable
|
||
joint profiling of multiple omics from the same cells. We
|
||
present Cobolt, a novel method that not only allows for
|
||
analyzing the data from joint-modality platforms, but provides
|
||
a coherent framework for the integration of multiple datasets
|
||
measured on different modalities. We demonstrate its
|
||
performance on multi-modality data of gene expression and
|
||
chromatin accessibility and illustrate the integration
|
||
abilities of Cobolt by jointly analyzing this multi-modality
|
||
data with single-cell RNA-seq and ATAC-seq datasets.},
|
||
issn = {1474-760X},
|
||
doi = {10.1186/s13059-021-02556-z},
|
||
url = {https://doi.org/10.1186/s13059-021-02556-z}
|
||
}
|
||
|
||
@article{Bredikhin2022,
|
||
author = {Bredikhin, Danila and Kats, Ilia and Stegle, Oliver},
|
||
title = {MUON: multimodal omics analysis framework},
|
||
journal = {Genome Biology},
|
||
year = 2022,
|
||
month = {Feb},
|
||
day = 01,
|
||
volume = 23,
|
||
number = 1,
|
||
pages = 42,
|
||
abstract = {Advances in multi-omics have led to an explosion of
|
||
multimodal datasets to address questions from basic biology to
|
||
translation. While these data provide novel opportunities for
|
||
discovery, they also pose management and analysis challenges,
|
||
thus motivating the development of tailored computational
|
||
solutions. Here, we present a data standard and an analysis
|
||
framework for multi-omics, MUON, designed to organise,
|
||
analyse, visualise, and exchange multimodal data. MUON stores
|
||
multimodal data in an efficient yet flexible and interoperable
|
||
data structure. MUON enables a versatile range of analyses,
|
||
from data preprocessing to flexible multi-omics alignment.},
|
||
issn = {1474-760X},
|
||
doi = {10.1186/s13059-021-02577-8},
|
||
url = {https://doi.org/10.1186/s13059-021-02577-8}
|
||
}
|
||
|
||
@inproceedings{pmlr-v176-lance22a,
|
||
title = {Multimodal single cell data integration challenge: Results
|
||
and lessons learned},
|
||
author = {Lance, Christopher and Luecken, Malte D. and Burkhardt,
|
||
Daniel B. and Cannoodt, Robrecht and Rautenstrauch, Pia and
|
||
Laddach, Anna and Ubingazhibov, Aidyn and Cao, Zhi-Jie and
|
||
Deng, Kaiwen and Khan, Sumeer and Liu, Qiao and Russkikh,
|
||
Nikolay and Ryazantsev, Gleb and Ohler, Uwe and data
|
||
integration competition participants, NeurIPS 2021 Multimodal
|
||
and Pisco, Angela Oliveira and Bloom, Jonathan and
|
||
Krishnaswamy, Smita and Theis, Fabian J.},
|
||
booktitle = {Proceedings of the NeurIPS 2021 Competitions and
|
||
Demonstrations Track},
|
||
pages = {162--176},
|
||
year = 2022,
|
||
editor = {Kiela, Douwe and Ciccone, Marco and Caputo, Barbara},
|
||
volume = 176,
|
||
series = {Proceedings of Machine Learning Research},
|
||
month = {06--14 Dec},
|
||
publisher = {PMLR},
|
||
pdf = {https://proceedings.mlr.press/v176/lance22a/lance22a.pdf},
|
||
url = {https://proceedings.mlr.press/v176/lance22a.html},
|
||
abstract = {Biology has become a data-intensive science. Recent
|
||
technological advances in single-cell genomics have enabled
|
||
the measurement of multiple facets of cellular state,
|
||
producing datasets with millions of single-cell observations.
|
||
While these data hold great promise for understanding
|
||
molecular mechanisms in health and disease, analysis
|
||
challenges arising from sparsity, technical and biological
|
||
variability, and high dimensionality of the data hinder the
|
||
derivation of such mechanistic insights. To promote the
|
||
innovation of algorithms for analysis of multimodal
|
||
single-cell data, we organized a competition at NeurIPS 2021
|
||
applying the Common Task Framework to multimodal single-cell
|
||
data integration. For this competition we generated the first
|
||
multimodal benchmarking dataset for single-cell biology and
|
||
defined three tasks in this domain: prediction of missing
|
||
modalities, aligning modalities, and learning a joint
|
||
representation across modalities. We further specified
|
||
evaluation metrics and developed a cloud-based algorithm
|
||
evaluation pipeline. Using this setup, 280 competitors
|
||
submitted over 2600 proposed solutions within a 3 month
|
||
period, showcasing substantial innovation especially in the
|
||
modality alignment task. Here, we present the results,
|
||
describe trends of well performing approaches, and discuss
|
||
challenges associated with running the competition.}
|
||
}
|