2022-10-13 13:35:20 +02:00
|
|
|
|
@article{https://doi.org/10.48550/arxiv.1706.03762,
|
|
|
|
|
doi = {10.48550/ARXIV.1706.03762},
|
|
|
|
|
url = {https://arxiv.org/abs/1706.03762},
|
|
|
|
|
author = {Vaswani, Ashish and Shazeer, Noam and Parmar, Niki and
|
|
|
|
|
Uszkoreit, Jakob and Jones, Llion and Gomez, Aidan N. and
|
|
|
|
|
Kaiser, Lukasz and Polosukhin, Illia},
|
|
|
|
|
keywords = {Computation and Language (cs.CL), Machine Learning (cs.LG),
|
|
|
|
|
FOS: Computer and information sciences, FOS: Computer and
|
|
|
|
|
information sciences},
|
|
|
|
|
title = {Attention Is All You Need},
|
|
|
|
|
publisher = {arXiv},
|
|
|
|
|
year = 2017,
|
|
|
|
|
copyright = {arXiv.org perpetual, non-exclusive license}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
@article{https://doi.org/10.48550/arxiv.1912.12180,
|
|
|
|
|
doi = {10.48550/ARXIV.1912.12180},
|
|
|
|
|
url = {https://arxiv.org/abs/1912.12180},
|
|
|
|
|
author = {Ho, Jonathan and Kalchbrenner, Nal and Weissenborn, Dirk
|
|
|
|
|
and Salimans, Tim},
|
|
|
|
|
keywords = {Computer Vision and Pattern Recognition (cs.CV), FOS:
|
|
|
|
|
Computer and information sciences, FOS: Computer and
|
|
|
|
|
information sciences},
|
|
|
|
|
title = {Axial Attention in Multidimensional Transformers},
|
|
|
|
|
publisher = {arXiv},
|
|
|
|
|
year = 2019,
|
|
|
|
|
copyright = {arXiv.org perpetual, non-exclusive license}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
@article{https://doi.org/10.48550/arxiv.2004.05150,
|
|
|
|
|
doi = {10.48550/ARXIV.2004.05150},
|
|
|
|
|
url = {https://arxiv.org/abs/2004.05150},
|
|
|
|
|
author = {Beltagy, Iz and Peters, Matthew E. and Cohan, Arman},
|
|
|
|
|
keywords = {Computation and Language (cs.CL), FOS: Computer and
|
|
|
|
|
information sciences, FOS: Computer and information sciences},
|
|
|
|
|
title = {Longformer: The Long-Document Transformer},
|
|
|
|
|
publisher = {arXiv},
|
|
|
|
|
year = 2020,
|
|
|
|
|
copyright = {arXiv.org perpetual, non-exclusive license}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
@article{https://doi.org/10.48550/arxiv.1901.02860,
|
|
|
|
|
doi = {10.48550/ARXIV.1901.02860},
|
|
|
|
|
url = {https://arxiv.org/abs/1901.02860},
|
|
|
|
|
author = {Dai, Zihang and Yang, Zhilin and Yang, Yiming and
|
|
|
|
|
Carbonell, Jaime and Le, Quoc V. and Salakhutdinov, Ruslan},
|
|
|
|
|
keywords = {Machine Learning (cs.LG), Computation and Language (cs.CL),
|
|
|
|
|
Machine Learning (stat.ML), FOS: Computer and information
|
|
|
|
|
sciences, FOS: Computer and information sciences},
|
|
|
|
|
title = {Transformer-XL: Attentive Language Models Beyond a
|
|
|
|
|
Fixed-Length Context},
|
|
|
|
|
publisher = {arXiv},
|
|
|
|
|
year = 2019,
|
|
|
|
|
copyright = {Creative Commons Attribution Non Commercial Share Alike 4.0
|
|
|
|
|
International}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
@inproceedings{devlin-etal-2019-bert,
|
|
|
|
|
title = "{BERT}: Pre-training of Deep Bidirectional Transformers for
|
|
|
|
|
Language Understanding",
|
|
|
|
|
author = "Devlin, Jacob and Chang, Ming-Wei and Lee, Kenton and
|
|
|
|
|
Toutanova, Kristina",
|
|
|
|
|
booktitle = "Proceedings of the 2019 Conference of the North {A}merican
|
|
|
|
|
Chapter of the Association for Computational Linguistics:
|
|
|
|
|
Human Language Technologies, Volume 1 (Long and Short Papers)",
|
|
|
|
|
month = jun,
|
|
|
|
|
year = 2019,
|
|
|
|
|
address = "Minneapolis, Minnesota",
|
|
|
|
|
publisher = "Association for Computational Linguistics",
|
|
|
|
|
url = "https://aclanthology.org/N19-1423",
|
|
|
|
|
doi = "10.18653/v1/N19-1423",
|
|
|
|
|
pages = "4171--4186",
|
|
|
|
|
abstract = "We introduce a new language representation model called
|
|
|
|
|
BERT, which stands for Bidirectional Encoder Representations
|
|
|
|
|
from Transformers. Unlike recent language representation
|
|
|
|
|
models (Peters et al., 2018a; Radford et al., 2018), BERT is
|
|
|
|
|
designed to pre-train deep bidirectional representations from
|
|
|
|
|
unlabeled text by jointly conditioning on both left and right
|
|
|
|
|
context in all layers. As a result, the pre-trained BERT model
|
|
|
|
|
can be fine-tuned with just one additional output layer to
|
|
|
|
|
create state-of-the-art models for a wide range of tasks, such
|
|
|
|
|
as question answering and language inference, without
|
|
|
|
|
substantial task-specific architecture modifications. BERT is
|
|
|
|
|
conceptually simple and empirically powerful. It obtains new
|
|
|
|
|
state-of-the-art results on eleven natural language processing
|
|
|
|
|
tasks, including pushing the GLUE score to 80.5 (7.7 point
|
|
|
|
|
absolute improvement), MultiNLI accuracy to 86.7{\%} (4.6{\%}
|
|
|
|
|
absolute improvement), SQuAD v1.1 question answering Test F1
|
|
|
|
|
to 93.2 (1.5 point absolute improvement) and SQuAD v2.0 Test
|
|
|
|
|
F1 to 83.1 (5.1 point absolute improvement).",
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
@article{https://doi.org/10.48550/arxiv.2007.04825,
|
|
|
|
|
doi = {10.48550/ARXIV.2007.04825},
|
|
|
|
|
url = {https://arxiv.org/abs/2007.04825},
|
|
|
|
|
author = {Vyas, Apoorv and Katharopoulos, Angelos and Fleuret,
|
|
|
|
|
François},
|
|
|
|
|
keywords = {Machine Learning (cs.LG), Machine Learning (stat.ML), FOS:
|
|
|
|
|
Computer and information sciences, FOS: Computer and
|
|
|
|
|
information sciences},
|
|
|
|
|
title = {Fast Transformers with Clustered Attention},
|
|
|
|
|
publisher = {arXiv},
|
|
|
|
|
year = 2020,
|
|
|
|
|
copyright = {arXiv.org perpetual, non-exclusive license}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
@inproceedings{bastings-filippova-2020-elephant,
|
|
|
|
|
title = "The elephant in the interpretability room: Why use
|
|
|
|
|
attention as explanation when we have saliency methods?",
|
|
|
|
|
author = "Bastings, Jasmijn and Filippova, Katja",
|
|
|
|
|
booktitle = "Proceedings of the Third BlackboxNLP Workshop on Analyzing
|
|
|
|
|
and Interpreting Neural Networks for NLP",
|
|
|
|
|
month = nov,
|
|
|
|
|
year = 2020,
|
|
|
|
|
address = "Online",
|
|
|
|
|
publisher = "Association for Computational Linguistics",
|
|
|
|
|
url = "https://aclanthology.org/2020.blackboxnlp-1.14",
|
|
|
|
|
doi = "10.18653/v1/2020.blackboxnlp-1.14",
|
|
|
|
|
pages = "149--155",
|
|
|
|
|
abstract = "There is a recent surge of interest in using attention as
|
|
|
|
|
explanation of model predictions, with mixed evidence on
|
|
|
|
|
whether attention can be used as such. While attention
|
|
|
|
|
conveniently gives us one weight per input token and is easily
|
|
|
|
|
extracted, it is often unclear toward what goal it is used as
|
|
|
|
|
explanation. We find that often that goal, whether explicitly
|
|
|
|
|
stated or not, is to find out what input tokens are the most
|
|
|
|
|
relevant to a prediction, and that the implied user for the
|
|
|
|
|
explanation is a model developer. For this goal and user, we
|
|
|
|
|
argue that input saliency methods are better suited, and that
|
|
|
|
|
there are no compelling reasons to use attention, despite the
|
|
|
|
|
coincidence that it provides a weight for each input. With
|
|
|
|
|
this position paper, we hope to shift some of the recent focus
|
|
|
|
|
on attention to saliency methods, and for authors to clearly
|
|
|
|
|
state the goal and user for their explanations.",
|
|
|
|
|
}
|
|
|
|
|
|
2022-10-20 14:23:02 +02:00
|
|
|
|
@article{https://doi.org/10.48550/arxiv.2204.01678,
|
|
|
|
|
doi = {10.48550/ARXIV.2204.01678},
|
|
|
|
|
url = {https://arxiv.org/abs/2204.01678},
|
|
|
|
|
author = {Bachmann, Roman and Mizrahi, David and Atanov, Andrei and
|
|
|
|
|
Zamir, Amir},
|
|
|
|
|
keywords = {Computer Vision and Pattern Recognition (cs.CV), Machine
|
|
|
|
|
Learning (cs.LG), FOS: Computer and information sciences, FOS:
|
|
|
|
|
Computer and information sciences},
|
|
|
|
|
title = {MultiMAE: Multi-modal Multi-task Masked Autoencoders},
|
|
|
|
|
publisher = {arXiv},
|
|
|
|
|
year = 2022,
|
|
|
|
|
copyright = {arXiv.org perpetual, non-exclusive license}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
@article{10.1093/bioinformatics/btab746,
|
|
|
|
|
author = {De Waele, Gaetan and Clauwaert, Jim and Menschaert, Gerben
|
|
|
|
|
and Waegeman, Willem},
|
|
|
|
|
title = "{CpG Transformer for imputation of single-cell methylomes}",
|
|
|
|
|
journal = {Bioinformatics},
|
|
|
|
|
volume = 38,
|
|
|
|
|
number = 3,
|
|
|
|
|
pages = {597-603},
|
|
|
|
|
year = 2021,
|
|
|
|
|
month = 10,
|
|
|
|
|
abstract = "{The adoption of current single-cell DNA methylation
|
|
|
|
|
sequencing protocols is hindered by incomplete coverage,
|
|
|
|
|
outlining the need for effective imputation techniques. The
|
|
|
|
|
task of imputing single-cell (methylation) data requires
|
|
|
|
|
models to build an understanding of underlying biological
|
|
|
|
|
processes.We adapt the transformer neural network architecture
|
|
|
|
|
to operate on methylation matrices through combining axial
|
|
|
|
|
attention with sliding window self-attention. The obtained CpG
|
|
|
|
|
Transformer displays state-of-the-art performances on a wide
|
|
|
|
|
range of scBS-seq and scRRBS-seq datasets. Furthermore, we
|
|
|
|
|
demonstrate the interpretability of CpG Transformer and
|
|
|
|
|
illustrate its rapid transfer learning properties, allowing
|
|
|
|
|
practitioners to train models on new datasets with a limited
|
|
|
|
|
computational and time budget.CpG Transformer is freely
|
|
|
|
|
available at
|
|
|
|
|
https://github.com/gdewael/cpg-transformer.Supplementary data
|
|
|
|
|
are available at Bioinformatics online.}",
|
|
|
|
|
issn = {1367-4803},
|
|
|
|
|
doi = {10.1093/bioinformatics/btab746},
|
|
|
|
|
url = {https://doi.org/10.1093/bioinformatics/btab746},
|
|
|
|
|
eprint =
|
|
|
|
|
{https://academic.oup.com/bioinformatics/article-pdf/38/3/597/42167564/btab746.pdf},
|
|
|
|
|
}
|
|
|
|
|
|
2022-10-13 13:35:20 +02:00
|
|
|
|
@article {Rao2021.02.12.430858,
|
|
|
|
|
author = {Rao, Roshan and Liu, Jason and Verkuil, Robert and Meier,
|
|
|
|
|
Joshua and Canny, John F. and Abbeel, Pieter and Sercu, Tom
|
|
|
|
|
and Rives, Alexander},
|
|
|
|
|
title = {MSA Transformer},
|
|
|
|
|
elocation-id = {2021.02.12.430858},
|
|
|
|
|
year = 2021,
|
|
|
|
|
doi = {10.1101/2021.02.12.430858},
|
|
|
|
|
publisher = {Cold Spring Harbor Laboratory},
|
|
|
|
|
abstract = {Unsupervised protein language models trained across
|
|
|
|
|
millions of diverse sequences learn structure and function of
|
|
|
|
|
proteins. Protein language models studied to date have been
|
|
|
|
|
trained to perform inference from individual sequences. The
|
|
|
|
|
longstanding approach in computational biology has been to
|
|
|
|
|
make inferences from a family of evo lutionarily related
|
|
|
|
|
sequences by fitting a model to each family independently. In
|
|
|
|
|
this work we combine the two paradigms. We introduce a protein
|
|
|
|
|
language model which takes as input a set of sequences in the
|
|
|
|
|
form of a multiple sequence alignment. The model interleaves
|
|
|
|
|
row and column attention across the input sequences and is
|
|
|
|
|
trained with a variant of the masked language modeling
|
|
|
|
|
objective across many protein families. The performance of the
|
|
|
|
|
model surpasses current state-of-the-art unsupervised
|
|
|
|
|
structure learning methods by a wide margin, with far greater
|
|
|
|
|
parameter efficiency than prior state-of-the-art protein
|
|
|
|
|
language models.Competing Interest StatementThe authors have
|
|
|
|
|
declared no competing interest.},
|
|
|
|
|
URL =
|
|
|
|
|
{https://www.biorxiv.org/content/early/2021/08/27/2021.02.12.430858},
|
|
|
|
|
eprint =
|
|
|
|
|
{https://www.biorxiv.org/content/early/2021/08/27/2021.02.12.430858.full.pdf},
|
|
|
|
|
journal = {bioRxiv}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
@article{Jumper2021,
|
|
|
|
|
author = {Jumper, John and Evans, Richard and Pritzel, Alexander and
|
|
|
|
|
Green, Tim and Figurnov, Michael and Ronneberger, Olaf and
|
|
|
|
|
Tunyasuvunakool, Kathryn and Bates, Russ and {\v{Z}}{\'i}dek,
|
|
|
|
|
Augustin and Potapenko, Anna and Bridgland, Alex and Meyer,
|
|
|
|
|
Clemens and Kohl, Simon A. A. and Ballard, Andrew J. and
|
|
|
|
|
Cowie, Andrew and Romera-Paredes, Bernardino and Nikolov,
|
|
|
|
|
Stanislav and Jain, Rishub and Adler, Jonas and Back, Trevor
|
|
|
|
|
and Petersen, Stig and Reiman, David and Clancy, Ellen and
|
|
|
|
|
Zielinski, Michal and Steinegger, Martin and Pacholska,
|
|
|
|
|
Michalina and Berghammer, Tamas and Bodenstein, Sebastian and
|
|
|
|
|
Silver, David and Vinyals, Oriol and Senior, Andrew W. and
|
|
|
|
|
Kavukcuoglu, Koray and Kohli, Pushmeet and Hassabis, Demis},
|
|
|
|
|
title = {Highly accurate protein structure prediction with
|
|
|
|
|
AlphaFold},
|
|
|
|
|
journal = {Nature},
|
|
|
|
|
year = 2021,
|
|
|
|
|
month = {Aug},
|
|
|
|
|
day = 01,
|
|
|
|
|
volume = 596,
|
|
|
|
|
number = 7873,
|
|
|
|
|
pages = {583-589},
|
|
|
|
|
abstract = {Proteins are essential to life, and understanding their
|
|
|
|
|
structure can facilitate a mechanistic understanding of their
|
|
|
|
|
function. Through an enormous experimental effort1--4, the
|
|
|
|
|
structures of around 100,000 unique proteins have been
|
|
|
|
|
determined5, but this represents a small fraction of the
|
|
|
|
|
billions of known protein sequences6,7. Structural coverage is
|
|
|
|
|
bottlenecked by the months to years of painstaking effort
|
|
|
|
|
required to determine a single protein structure. Accurate
|
|
|
|
|
computational approaches are needed to address this gap and to
|
|
|
|
|
enable large-scale structural bioinformatics. Predicting the
|
|
|
|
|
three-dimensional structure that a protein will adopt based
|
|
|
|
|
solely on its amino acid sequence---the structure prediction
|
|
|
|
|
component of the `protein folding problem'8---has been an
|
|
|
|
|
important open research problem for more than 50 years9.
|
|
|
|
|
Despite recent progress10--14, existing methods fall far short
|
|
|
|
|
of atomic accuracy, especially when no homologous structure is
|
|
|
|
|
available. Here we provide the first computational method that
|
|
|
|
|
can regularly predict protein structures with atomic accuracy
|
|
|
|
|
even in cases in which no similar structure is known. We
|
|
|
|
|
validated an entirely redesigned version of our neural
|
|
|
|
|
network-based model, AlphaFold, in the challenging 14th
|
|
|
|
|
Critical Assessment of protein Structure Prediction
|
|
|
|
|
(CASP14)15, demonstrating accuracy competitive with
|
|
|
|
|
experimental structures in a majority of cases and greatly
|
|
|
|
|
outperforming other methods. Underpinning the latest version
|
|
|
|
|
of AlphaFold is a novel machine learning approach that
|
|
|
|
|
incorporates physical and biological knowledge about protein
|
|
|
|
|
structure, leveraging multi-sequence alignments, into the
|
|
|
|
|
design of the deep learning algorithm.},
|
|
|
|
|
issn = {1476-4687},
|
|
|
|
|
doi = {10.1038/s41586-021-03819-2},
|
|
|
|
|
url = {https://doi.org/10.1038/s41586-021-03819-2}
|
|
|
|
|
}
|
2022-10-20 14:23:02 +02:00
|
|
|
|
|
|
|
|
|
@article {Ashuach2021.08.20.457057,
|
|
|
|
|
author = {Ashuach, Tal and Gabitto, Mariano I. and Jordan, Michael I.
|
|
|
|
|
and Yosef, Nir},
|
|
|
|
|
title = {MultiVI: deep generative model for the integration of
|
|
|
|
|
multi-modal data},
|
|
|
|
|
elocation-id = {2021.08.20.457057},
|
|
|
|
|
year = 2021,
|
|
|
|
|
doi = {10.1101/2021.08.20.457057},
|
|
|
|
|
publisher = {Cold Spring Harbor Laboratory},
|
|
|
|
|
abstract = {Jointly profiling the transcriptional and chromatin
|
|
|
|
|
accessibility landscapes of single-cells is a powerful
|
|
|
|
|
technique to characterize cellular populations. Here we
|
|
|
|
|
present MultiVI, a probabilistic model to analyze such
|
|
|
|
|
multiomic data and integrate it with single modality datasets.
|
|
|
|
|
MultiVI creates a joint representation that accurately
|
|
|
|
|
reflects both chromatin and transcriptional properties of the
|
|
|
|
|
cells even when one modality is missing. It also imputes
|
|
|
|
|
missing data, corrects for batch effects and is available in
|
|
|
|
|
the scvi-tools framework:
|
|
|
|
|
https://docs.scvi-tools.org/.Competing Interest StatementThe
|
|
|
|
|
authors have declared no competing interest.},
|
|
|
|
|
URL =
|
|
|
|
|
{https://www.biorxiv.org/content/early/2021/09/07/2021.08.20.457057},
|
|
|
|
|
eprint =
|
|
|
|
|
{https://www.biorxiv.org/content/early/2021/09/07/2021.08.20.457057.full.pdf},
|
|
|
|
|
journal = {bioRxiv}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
@article{Gong2021,
|
|
|
|
|
author = {Gong, Boying and Zhou, Yun and Purdom, Elizabeth},
|
|
|
|
|
title = {Cobolt: integrative analysis of multimodal single-cell
|
|
|
|
|
sequencing data},
|
|
|
|
|
journal = {Genome Biology},
|
|
|
|
|
year = 2021,
|
|
|
|
|
month = {Dec},
|
|
|
|
|
day = 28,
|
|
|
|
|
volume = 22,
|
|
|
|
|
number = 1,
|
|
|
|
|
pages = 351,
|
|
|
|
|
abstract = {A growing number of single-cell sequencing platforms enable
|
|
|
|
|
joint profiling of multiple omics from the same cells. We
|
|
|
|
|
present Cobolt, a novel method that not only allows for
|
|
|
|
|
analyzing the data from joint-modality platforms, but provides
|
|
|
|
|
a coherent framework for the integration of multiple datasets
|
|
|
|
|
measured on different modalities. We demonstrate its
|
|
|
|
|
performance on multi-modality data of gene expression and
|
|
|
|
|
chromatin accessibility and illustrate the integration
|
|
|
|
|
abilities of Cobolt by jointly analyzing this multi-modality
|
|
|
|
|
data with single-cell RNA-seq and ATAC-seq datasets.},
|
|
|
|
|
issn = {1474-760X},
|
|
|
|
|
doi = {10.1186/s13059-021-02556-z},
|
|
|
|
|
url = {https://doi.org/10.1186/s13059-021-02556-z}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
@article{Bredikhin2022,
|
|
|
|
|
author = {Bredikhin, Danila and Kats, Ilia and Stegle, Oliver},
|
|
|
|
|
title = {MUON: multimodal omics analysis framework},
|
|
|
|
|
journal = {Genome Biology},
|
|
|
|
|
year = 2022,
|
|
|
|
|
month = {Feb},
|
|
|
|
|
day = 01,
|
|
|
|
|
volume = 23,
|
|
|
|
|
number = 1,
|
|
|
|
|
pages = 42,
|
|
|
|
|
abstract = {Advances in multi-omics have led to an explosion of
|
|
|
|
|
multimodal datasets to address questions from basic biology to
|
|
|
|
|
translation. While these data provide novel opportunities for
|
|
|
|
|
discovery, they also pose management and analysis challenges,
|
|
|
|
|
thus motivating the development of tailored computational
|
|
|
|
|
solutions. Here, we present a data standard and an analysis
|
|
|
|
|
framework for multi-omics, MUON, designed to organise,
|
|
|
|
|
analyse, visualise, and exchange multimodal data. MUON stores
|
|
|
|
|
multimodal data in an efficient yet flexible and interoperable
|
|
|
|
|
data structure. MUON enables a versatile range of analyses,
|
|
|
|
|
from data preprocessing to flexible multi-omics alignment.},
|
|
|
|
|
issn = {1474-760X},
|
|
|
|
|
doi = {10.1186/s13059-021-02577-8},
|
|
|
|
|
url = {https://doi.org/10.1186/s13059-021-02577-8}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
@inproceedings{pmlr-v176-lance22a,
|
|
|
|
|
title = {Multimodal single cell data integration challenge: Results
|
|
|
|
|
and lessons learned},
|
|
|
|
|
author = {Lance, Christopher and Luecken, Malte D. and Burkhardt,
|
|
|
|
|
Daniel B. and Cannoodt, Robrecht and Rautenstrauch, Pia and
|
|
|
|
|
Laddach, Anna and Ubingazhibov, Aidyn and Cao, Zhi-Jie and
|
|
|
|
|
Deng, Kaiwen and Khan, Sumeer and Liu, Qiao and Russkikh,
|
|
|
|
|
Nikolay and Ryazantsev, Gleb and Ohler, Uwe and data
|
|
|
|
|
integration competition participants, NeurIPS 2021 Multimodal
|
|
|
|
|
and Pisco, Angela Oliveira and Bloom, Jonathan and
|
|
|
|
|
Krishnaswamy, Smita and Theis, Fabian J.},
|
|
|
|
|
booktitle = {Proceedings of the NeurIPS 2021 Competitions and
|
|
|
|
|
Demonstrations Track},
|
|
|
|
|
pages = {162--176},
|
|
|
|
|
year = 2022,
|
|
|
|
|
editor = {Kiela, Douwe and Ciccone, Marco and Caputo, Barbara},
|
|
|
|
|
volume = 176,
|
|
|
|
|
series = {Proceedings of Machine Learning Research},
|
|
|
|
|
month = {06--14 Dec},
|
|
|
|
|
publisher = {PMLR},
|
|
|
|
|
pdf = {https://proceedings.mlr.press/v176/lance22a/lance22a.pdf},
|
|
|
|
|
url = {https://proceedings.mlr.press/v176/lance22a.html},
|
|
|
|
|
abstract = {Biology has become a data-intensive science. Recent
|
|
|
|
|
technological advances in single-cell genomics have enabled
|
|
|
|
|
the measurement of multiple facets of cellular state,
|
|
|
|
|
producing datasets with millions of single-cell observations.
|
|
|
|
|
While these data hold great promise for understanding
|
|
|
|
|
molecular mechanisms in health and disease, analysis
|
|
|
|
|
challenges arising from sparsity, technical and biological
|
|
|
|
|
variability, and high dimensionality of the data hinder the
|
|
|
|
|
derivation of such mechanistic insights. To promote the
|
|
|
|
|
innovation of algorithms for analysis of multimodal
|
|
|
|
|
single-cell data, we organized a competition at NeurIPS 2021
|
|
|
|
|
applying the Common Task Framework to multimodal single-cell
|
|
|
|
|
data integration. For this competition we generated the first
|
|
|
|
|
multimodal benchmarking dataset for single-cell biology and
|
|
|
|
|
defined three tasks in this domain: prediction of missing
|
|
|
|
|
modalities, aligning modalities, and learning a joint
|
|
|
|
|
representation across modalities. We further specified
|
|
|
|
|
evaluation metrics and developed a cloud-based algorithm
|
|
|
|
|
evaluation pipeline. Using this setup, 280 competitors
|
|
|
|
|
submitted over 2600 proposed solutions within a 3 month
|
|
|
|
|
period, showcasing substantial innovation especially in the
|
|
|
|
|
modality alignment task. Here, we present the results,
|
|
|
|
|
describe trends of well performing approaches, and discuss
|
|
|
|
|
challenges associated with running the competition.}
|
|
|
|
|
}
|