@article{https://doi.org/10.48550/arxiv.1706.03762, doi = {10.48550/ARXIV.1706.03762}, url = {https://arxiv.org/abs/1706.03762}, author = {Vaswani, Ashish and Shazeer, Noam and Parmar, Niki and Uszkoreit, Jakob and Jones, Llion and Gomez, Aidan N. and Kaiser, Lukasz and Polosukhin, Illia}, keywords = {Computation and Language (cs.CL), Machine Learning (cs.LG), FOS: Computer and information sciences, FOS: Computer and information sciences}, title = {Attention Is All You Need}, publisher = {arXiv}, year = 2017, copyright = {arXiv.org perpetual, non-exclusive license} } @article{https://doi.org/10.48550/arxiv.1912.12180, doi = {10.48550/ARXIV.1912.12180}, url = {https://arxiv.org/abs/1912.12180}, author = {Ho, Jonathan and Kalchbrenner, Nal and Weissenborn, Dirk and Salimans, Tim}, keywords = {Computer Vision and Pattern Recognition (cs.CV), FOS: Computer and information sciences, FOS: Computer and information sciences}, title = {Axial Attention in Multidimensional Transformers}, publisher = {arXiv}, year = 2019, copyright = {arXiv.org perpetual, non-exclusive license} } @article{https://doi.org/10.48550/arxiv.2004.05150, doi = {10.48550/ARXIV.2004.05150}, url = {https://arxiv.org/abs/2004.05150}, author = {Beltagy, Iz and Peters, Matthew E. and Cohan, Arman}, keywords = {Computation and Language (cs.CL), FOS: Computer and information sciences, FOS: Computer and information sciences}, title = {Longformer: The Long-Document Transformer}, publisher = {arXiv}, year = 2020, copyright = {arXiv.org perpetual, non-exclusive license} } @article{https://doi.org/10.48550/arxiv.1901.02860, doi = {10.48550/ARXIV.1901.02860}, url = {https://arxiv.org/abs/1901.02860}, author = {Dai, Zihang and Yang, Zhilin and Yang, Yiming and Carbonell, Jaime and Le, Quoc V. and Salakhutdinov, Ruslan}, keywords = {Machine Learning (cs.LG), Computation and Language (cs.CL), Machine Learning (stat.ML), FOS: Computer and information sciences, FOS: Computer and information sciences}, title = {Transformer-XL: Attentive Language Models Beyond a Fixed-Length Context}, publisher = {arXiv}, year = 2019, copyright = {Creative Commons Attribution Non Commercial Share Alike 4.0 International} } @inproceedings{devlin-etal-2019-bert, title = "{BERT}: Pre-training of Deep Bidirectional Transformers for Language Understanding", author = "Devlin, Jacob and Chang, Ming-Wei and Lee, Kenton and Toutanova, Kristina", booktitle = "Proceedings of the 2019 Conference of the North {A}merican Chapter of the Association for Computational Linguistics: Human Language Technologies, Volume 1 (Long and Short Papers)", month = jun, year = 2019, address = "Minneapolis, Minnesota", publisher = "Association for Computational Linguistics", url = "https://aclanthology.org/N19-1423", doi = "10.18653/v1/N19-1423", pages = "4171--4186", abstract = "We introduce a new language representation model called BERT, which stands for Bidirectional Encoder Representations from Transformers. Unlike recent language representation models (Peters et al., 2018a; Radford et al., 2018), BERT is designed to pre-train deep bidirectional representations from unlabeled text by jointly conditioning on both left and right context in all layers. As a result, the pre-trained BERT model can be fine-tuned with just one additional output layer to create state-of-the-art models for a wide range of tasks, such as question answering and language inference, without substantial task-specific architecture modifications. BERT is conceptually simple and empirically powerful. It obtains new state-of-the-art results on eleven natural language processing tasks, including pushing the GLUE score to 80.5 (7.7 point absolute improvement), MultiNLI accuracy to 86.7{\%} (4.6{\%} absolute improvement), SQuAD v1.1 question answering Test F1 to 93.2 (1.5 point absolute improvement) and SQuAD v2.0 Test F1 to 83.1 (5.1 point absolute improvement).", } @article{https://doi.org/10.48550/arxiv.2007.04825, doi = {10.48550/ARXIV.2007.04825}, url = {https://arxiv.org/abs/2007.04825}, author = {Vyas, Apoorv and Katharopoulos, Angelos and Fleuret, François}, keywords = {Machine Learning (cs.LG), Machine Learning (stat.ML), FOS: Computer and information sciences, FOS: Computer and information sciences}, title = {Fast Transformers with Clustered Attention}, publisher = {arXiv}, year = 2020, copyright = {arXiv.org perpetual, non-exclusive license} } @inproceedings{bastings-filippova-2020-elephant, title = "The elephant in the interpretability room: Why use attention as explanation when we have saliency methods?", author = "Bastings, Jasmijn and Filippova, Katja", booktitle = "Proceedings of the Third BlackboxNLP Workshop on Analyzing and Interpreting Neural Networks for NLP", month = nov, year = 2020, address = "Online", publisher = "Association for Computational Linguistics", url = "https://aclanthology.org/2020.blackboxnlp-1.14", doi = "10.18653/v1/2020.blackboxnlp-1.14", pages = "149--155", abstract = "There is a recent surge of interest in using attention as explanation of model predictions, with mixed evidence on whether attention can be used as such. While attention conveniently gives us one weight per input token and is easily extracted, it is often unclear toward what goal it is used as explanation. We find that often that goal, whether explicitly stated or not, is to find out what input tokens are the most relevant to a prediction, and that the implied user for the explanation is a model developer. For this goal and user, we argue that input saliency methods are better suited, and that there are no compelling reasons to use attention, despite the coincidence that it provides a weight for each input. With this position paper, we hope to shift some of the recent focus on attention to saliency methods, and for authors to clearly state the goal and user for their explanations.", } @article{https://doi.org/10.48550/arxiv.2204.01678, doi = {10.48550/ARXIV.2204.01678}, url = {https://arxiv.org/abs/2204.01678}, author = {Bachmann, Roman and Mizrahi, David and Atanov, Andrei and Zamir, Amir}, keywords = {Computer Vision and Pattern Recognition (cs.CV), Machine Learning (cs.LG), FOS: Computer and information sciences, FOS: Computer and information sciences}, title = {MultiMAE: Multi-modal Multi-task Masked Autoencoders}, publisher = {arXiv}, year = 2022, copyright = {arXiv.org perpetual, non-exclusive license} } @article{10.1093/bioinformatics/btab746, author = {De Waele, Gaetan and Clauwaert, Jim and Menschaert, Gerben and Waegeman, Willem}, title = "{CpG Transformer for imputation of single-cell methylomes}", journal = {Bioinformatics}, volume = 38, number = 3, pages = {597-603}, year = 2021, month = 10, abstract = "{The adoption of current single-cell DNA methylation sequencing protocols is hindered by incomplete coverage, outlining the need for effective imputation techniques. The task of imputing single-cell (methylation) data requires models to build an understanding of underlying biological processes.We adapt the transformer neural network architecture to operate on methylation matrices through combining axial attention with sliding window self-attention. The obtained CpG Transformer displays state-of-the-art performances on a wide range of scBS-seq and scRRBS-seq datasets. Furthermore, we demonstrate the interpretability of CpG Transformer and illustrate its rapid transfer learning properties, allowing practitioners to train models on new datasets with a limited computational and time budget.CpG Transformer is freely available at https://github.com/gdewael/cpg-transformer.Supplementary data are available at Bioinformatics online.}", issn = {1367-4803}, doi = {10.1093/bioinformatics/btab746}, url = {https://doi.org/10.1093/bioinformatics/btab746}, eprint = {https://academic.oup.com/bioinformatics/article-pdf/38/3/597/42167564/btab746.pdf}, } @article {Rao2021.02.12.430858, author = {Rao, Roshan and Liu, Jason and Verkuil, Robert and Meier, Joshua and Canny, John F. and Abbeel, Pieter and Sercu, Tom and Rives, Alexander}, title = {MSA Transformer}, elocation-id = {2021.02.12.430858}, year = 2021, doi = {10.1101/2021.02.12.430858}, publisher = {Cold Spring Harbor Laboratory}, abstract = {Unsupervised protein language models trained across millions of diverse sequences learn structure and function of proteins. Protein language models studied to date have been trained to perform inference from individual sequences. The longstanding approach in computational biology has been to make inferences from a family of evo lutionarily related sequences by fitting a model to each family independently. In this work we combine the two paradigms. We introduce a protein language model which takes as input a set of sequences in the form of a multiple sequence alignment. The model interleaves row and column attention across the input sequences and is trained with a variant of the masked language modeling objective across many protein families. The performance of the model surpasses current state-of-the-art unsupervised structure learning methods by a wide margin, with far greater parameter efficiency than prior state-of-the-art protein language models.Competing Interest StatementThe authors have declared no competing interest.}, URL = {https://www.biorxiv.org/content/early/2021/08/27/2021.02.12.430858}, eprint = {https://www.biorxiv.org/content/early/2021/08/27/2021.02.12.430858.full.pdf}, journal = {bioRxiv} } @article{Jumper2021, author = {Jumper, John and Evans, Richard and Pritzel, Alexander and Green, Tim and Figurnov, Michael and Ronneberger, Olaf and Tunyasuvunakool, Kathryn and Bates, Russ and {\v{Z}}{\'i}dek, Augustin and Potapenko, Anna and Bridgland, Alex and Meyer, Clemens and Kohl, Simon A. A. and Ballard, Andrew J. and Cowie, Andrew and Romera-Paredes, Bernardino and Nikolov, Stanislav and Jain, Rishub and Adler, Jonas and Back, Trevor and Petersen, Stig and Reiman, David and Clancy, Ellen and Zielinski, Michal and Steinegger, Martin and Pacholska, Michalina and Berghammer, Tamas and Bodenstein, Sebastian and Silver, David and Vinyals, Oriol and Senior, Andrew W. and Kavukcuoglu, Koray and Kohli, Pushmeet and Hassabis, Demis}, title = {Highly accurate protein structure prediction with AlphaFold}, journal = {Nature}, year = 2021, month = {Aug}, day = 01, volume = 596, number = 7873, pages = {583-589}, abstract = {Proteins are essential to life, and understanding their structure can facilitate a mechanistic understanding of their function. Through an enormous experimental effort1--4, the structures of around 100,000 unique proteins have been determined5, but this represents a small fraction of the billions of known protein sequences6,7. Structural coverage is bottlenecked by the months to years of painstaking effort required to determine a single protein structure. Accurate computational approaches are needed to address this gap and to enable large-scale structural bioinformatics. Predicting the three-dimensional structure that a protein will adopt based solely on its amino acid sequence---the structure prediction component of the `protein folding problem'8---has been an important open research problem for more than 50 years9. Despite recent progress10--14, existing methods fall far short of atomic accuracy, especially when no homologous structure is available. Here we provide the first computational method that can regularly predict protein structures with atomic accuracy even in cases in which no similar structure is known. We validated an entirely redesigned version of our neural network-based model, AlphaFold, in the challenging 14th Critical Assessment of protein Structure Prediction (CASP14)15, demonstrating accuracy competitive with experimental structures in a majority of cases and greatly outperforming other methods. Underpinning the latest version of AlphaFold is a novel machine learning approach that incorporates physical and biological knowledge about protein structure, leveraging multi-sequence alignments, into the design of the deep learning algorithm.}, issn = {1476-4687}, doi = {10.1038/s41586-021-03819-2}, url = {https://doi.org/10.1038/s41586-021-03819-2} } @article {Ashuach2021.08.20.457057, author = {Ashuach, Tal and Gabitto, Mariano I. and Jordan, Michael I. and Yosef, Nir}, title = {MultiVI: deep generative model for the integration of multi-modal data}, elocation-id = {2021.08.20.457057}, year = 2021, doi = {10.1101/2021.08.20.457057}, publisher = {Cold Spring Harbor Laboratory}, abstract = {Jointly profiling the transcriptional and chromatin accessibility landscapes of single-cells is a powerful technique to characterize cellular populations. Here we present MultiVI, a probabilistic model to analyze such multiomic data and integrate it with single modality datasets. MultiVI creates a joint representation that accurately reflects both chromatin and transcriptional properties of the cells even when one modality is missing. It also imputes missing data, corrects for batch effects and is available in the scvi-tools framework: https://docs.scvi-tools.org/.Competing Interest StatementThe authors have declared no competing interest.}, URL = {https://www.biorxiv.org/content/early/2021/09/07/2021.08.20.457057}, eprint = {https://www.biorxiv.org/content/early/2021/09/07/2021.08.20.457057.full.pdf}, journal = {bioRxiv} } @article{Gong2021, author = {Gong, Boying and Zhou, Yun and Purdom, Elizabeth}, title = {Cobolt: integrative analysis of multimodal single-cell sequencing data}, journal = {Genome Biology}, year = 2021, month = {Dec}, day = 28, volume = 22, number = 1, pages = 351, abstract = {A growing number of single-cell sequencing platforms enable joint profiling of multiple omics from the same cells. We present Cobolt, a novel method that not only allows for analyzing the data from joint-modality platforms, but provides a coherent framework for the integration of multiple datasets measured on different modalities. We demonstrate its performance on multi-modality data of gene expression and chromatin accessibility and illustrate the integration abilities of Cobolt by jointly analyzing this multi-modality data with single-cell RNA-seq and ATAC-seq datasets.}, issn = {1474-760X}, doi = {10.1186/s13059-021-02556-z}, url = {https://doi.org/10.1186/s13059-021-02556-z} } @article{Bredikhin2022, author = {Bredikhin, Danila and Kats, Ilia and Stegle, Oliver}, title = {MUON: multimodal omics analysis framework}, journal = {Genome Biology}, year = 2022, month = {Feb}, day = 01, volume = 23, number = 1, pages = 42, abstract = {Advances in multi-omics have led to an explosion of multimodal datasets to address questions from basic biology to translation. While these data provide novel opportunities for discovery, they also pose management and analysis challenges, thus motivating the development of tailored computational solutions. Here, we present a data standard and an analysis framework for multi-omics, MUON, designed to organise, analyse, visualise, and exchange multimodal data. MUON stores multimodal data in an efficient yet flexible and interoperable data structure. MUON enables a versatile range of analyses, from data preprocessing to flexible multi-omics alignment.}, issn = {1474-760X}, doi = {10.1186/s13059-021-02577-8}, url = {https://doi.org/10.1186/s13059-021-02577-8} } @inproceedings{pmlr-v176-lance22a, title = {Multimodal single cell data integration challenge: Results and lessons learned}, author = {Lance, Christopher and Luecken, Malte D. and Burkhardt, Daniel B. and Cannoodt, Robrecht and Rautenstrauch, Pia and Laddach, Anna and Ubingazhibov, Aidyn and Cao, Zhi-Jie and Deng, Kaiwen and Khan, Sumeer and Liu, Qiao and Russkikh, Nikolay and Ryazantsev, Gleb and Ohler, Uwe and data integration competition participants, NeurIPS 2021 Multimodal and Pisco, Angela Oliveira and Bloom, Jonathan and Krishnaswamy, Smita and Theis, Fabian J.}, booktitle = {Proceedings of the NeurIPS 2021 Competitions and Demonstrations Track}, pages = {162--176}, year = 2022, editor = {Kiela, Douwe and Ciccone, Marco and Caputo, Barbara}, volume = 176, series = {Proceedings of Machine Learning Research}, month = {06--14 Dec}, publisher = {PMLR}, pdf = {https://proceedings.mlr.press/v176/lance22a/lance22a.pdf}, url = {https://proceedings.mlr.press/v176/lance22a.html}, abstract = {Biology has become a data-intensive science. Recent technological advances in single-cell genomics have enabled the measurement of multiple facets of cellular state, producing datasets with millions of single-cell observations. While these data hold great promise for understanding molecular mechanisms in health and disease, analysis challenges arising from sparsity, technical and biological variability, and high dimensionality of the data hinder the derivation of such mechanistic insights. To promote the innovation of algorithms for analysis of multimodal single-cell data, we organized a competition at NeurIPS 2021 applying the Common Task Framework to multimodal single-cell data integration. For this competition we generated the first multimodal benchmarking dataset for single-cell biology and defined three tasks in this domain: prediction of missing modalities, aligning modalities, and learning a joint representation across modalities. We further specified evaluation metrics and developed a cloud-based algorithm evaluation pipeline. Using this setup, 280 competitors submitted over 2600 proposed solutions within a 3 month period, showcasing substantial innovation especially in the modality alignment task. Here, we present the results, describe trends of well performing approaches, and discuss challenges associated with running the competition.} }