diff --git a/docs/Bibliography.org b/docs/Bibliography.org index 5340741..5aeb15d 100644 --- a/docs/Bibliography.org +++ b/docs/Bibliography.org @@ -5,8 +5,7 @@ #+OPTIONS: <:nil c:nil todo:nil H:5 #+auto_tangle: t * Deep Learning -** Transformers -*** Attention is All You Need +** Attention is All You Need #+begin_src bibtex @article{https://doi.org/10.48550/arxiv.1706.03762, doi = {10.48550/ARXIV.1706.03762}, @@ -25,7 +24,7 @@ #+end_src #+LaTeX: \printbibliography[heading=none] -*** Axial Attention in Multidimensional Transformers +** Axial Attention in Multidimensional Transformers #+begin_src bibtex @article{https://doi.org/10.48550/arxiv.1912.12180, doi = {10.48550/ARXIV.1912.12180}, @@ -41,7 +40,7 @@ copyright = {arXiv.org perpetual, non-exclusive license} } #+end_src -*** Longformer: The Long-Document Transformer +** Longformer: The Long-Document Transformer #+begin_src bibtex @article{https://doi.org/10.48550/arxiv.2004.05150, doi = {10.48550/ARXIV.2004.05150}, @@ -55,7 +54,7 @@ copyright = {arXiv.org perpetual, non-exclusive license} } #+end_src -*** Transformer-XL: Attentive Language Models Beyond a Fixed-Length Context +** Transformer-XL: Attentive Language Models Beyond a Fixed-Length Context #+begin_src bibtex @article{https://doi.org/10.48550/arxiv.1901.02860, doi = {10.48550/ARXIV.1901.02860}, @@ -73,7 +72,7 @@ International} } #+end_src -*** BERT: Pre-training of Deep Bidirectional Transformers for Language Understanding +** BERT: Pre-training of Deep Bidirectional Transformers for Language Understanding #+begin_src bibtex @inproceedings{devlin-etal-2019-bert, title = "{BERT}: Pre-training of Deep Bidirectional Transformers for @@ -112,7 +111,7 @@ #+end_src A masked language model (MLM) randomly masks some of the tokens from the input, and the objective is to predict the original input based only on its context. -*** Fast Transformers with Clustered Attention +** Fast Transformers with Clustered Attention #+begin_src bibtex @article{https://doi.org/10.48550/arxiv.2007.04825, doi = {10.48550/ARXIV.2007.04825}, @@ -128,7 +127,7 @@ A masked language model (MLM) randomly masks some of the tokens from the input, copyright = {arXiv.org perpetual, non-exclusive license} } #+end_src -*** The elephant in the interpretability room: Why use attention as explanation when we have saliency methods? +** The elephant in the interpretability room: Why use attention as explanation when we have saliency methods? #+begin_src bibtex @inproceedings{bastings-filippova-2020-elephant, title = "The elephant in the interpretability room: Why use @@ -160,7 +159,59 @@ A masked language model (MLM) randomly masks some of the tokens from the input, state the goal and user for their explanations.", } #+end_src +** MultiMAE: Multi-modal Multi-task Masked Autoencoders +#+begin_src bibtex +@article{https://doi.org/10.48550/arxiv.2204.01678, + doi = {10.48550/ARXIV.2204.01678}, + url = {https://arxiv.org/abs/2204.01678}, + author = {Bachmann, Roman and Mizrahi, David and Atanov, Andrei and + Zamir, Amir}, + keywords = {Computer Vision and Pattern Recognition (cs.CV), Machine + Learning (cs.LG), FOS: Computer and information sciences, FOS: + Computer and information sciences}, + title = {MultiMAE: Multi-modal Multi-task Masked Autoencoders}, + publisher = {arXiv}, + year = 2022, + copyright = {arXiv.org perpetual, non-exclusive license} +} +#+end_src * Deep Learning + Biology +** CpG Transformer for imputation of single-cell methylomes +#+begin_src bibtex +@article{10.1093/bioinformatics/btab746, + author = {De Waele, Gaetan and Clauwaert, Jim and Menschaert, Gerben + and Waegeman, Willem}, + title = "{CpG Transformer for imputation of single-cell methylomes}", + journal = {Bioinformatics}, + volume = 38, + number = 3, + pages = {597-603}, + year = 2021, + month = 10, + abstract = "{The adoption of current single-cell DNA methylation + sequencing protocols is hindered by incomplete coverage, + outlining the need for effective imputation techniques. The + task of imputing single-cell (methylation) data requires + models to build an understanding of underlying biological + processes.We adapt the transformer neural network architecture + to operate on methylation matrices through combining axial + attention with sliding window self-attention. The obtained CpG + Transformer displays state-of-the-art performances on a wide + range of scBS-seq and scRRBS-seq datasets. Furthermore, we + demonstrate the interpretability of CpG Transformer and + illustrate its rapid transfer learning properties, allowing + practitioners to train models on new datasets with a limited + computational and time budget.CpG Transformer is freely + available at + https://github.com/gdewael/cpg-transformer.Supplementary data + are available at Bioinformatics online.}", + issn = {1367-4803}, + doi = {10.1093/bioinformatics/btab746}, + url = {https://doi.org/10.1093/bioinformatics/btab746}, + eprint = + {https://academic.oup.com/bioinformatics/article-pdf/38/3/597/42167564/btab746.pdf}, +} +#+end_src ** MSA Transformer #+begin_src bibtex @article {Rao2021.02.12.430858, @@ -255,4 +306,140 @@ A masked language model (MLM) randomly masks some of the tokens from the input, url = {https://doi.org/10.1038/s41586-021-03819-2} } #+end_src +** MultiVI: deep generative model for the integration of multi-modal data +#+begin_src bibtex +@article {Ashuach2021.08.20.457057, + author = {Ashuach, Tal and Gabitto, Mariano I. and Jordan, Michael I. + and Yosef, Nir}, + title = {MultiVI: deep generative model for the integration of + multi-modal data}, + elocation-id = {2021.08.20.457057}, + year = 2021, + doi = {10.1101/2021.08.20.457057}, + publisher = {Cold Spring Harbor Laboratory}, + abstract = {Jointly profiling the transcriptional and chromatin + accessibility landscapes of single-cells is a powerful + technique to characterize cellular populations. Here we + present MultiVI, a probabilistic model to analyze such + multiomic data and integrate it with single modality datasets. + MultiVI creates a joint representation that accurately + reflects both chromatin and transcriptional properties of the + cells even when one modality is missing. It also imputes + missing data, corrects for batch effects and is available in + the scvi-tools framework: + https://docs.scvi-tools.org/.Competing Interest StatementThe + authors have declared no competing interest.}, + URL = + {https://www.biorxiv.org/content/early/2021/09/07/2021.08.20.457057}, + eprint = + {https://www.biorxiv.org/content/early/2021/09/07/2021.08.20.457057.full.pdf}, + journal = {bioRxiv} +} +#+end_src * Biology +** Cobolt: integrative analysis of multimodal single-cell sequencing data +#+begin_src bibtex +@article{Gong2021, + author = {Gong, Boying and Zhou, Yun and Purdom, Elizabeth}, + title = {Cobolt: integrative analysis of multimodal single-cell + sequencing data}, + journal = {Genome Biology}, + year = 2021, + month = {Dec}, + day = 28, + volume = 22, + number = 1, + pages = 351, + abstract = {A growing number of single-cell sequencing platforms enable + joint profiling of multiple omics from the same cells. We + present Cobolt, a novel method that not only allows for + analyzing the data from joint-modality platforms, but provides + a coherent framework for the integration of multiple datasets + measured on different modalities. We demonstrate its + performance on multi-modality data of gene expression and + chromatin accessibility and illustrate the integration + abilities of Cobolt by jointly analyzing this multi-modality + data with single-cell RNA-seq and ATAC-seq datasets.}, + issn = {1474-760X}, + doi = {10.1186/s13059-021-02556-z}, + url = {https://doi.org/10.1186/s13059-021-02556-z} +} +#+end_src +** MUON: multimodal omics analysis framework +#+begin_src bibtex +@article{Bredikhin2022, + author = {Bredikhin, Danila and Kats, Ilia and Stegle, Oliver}, + title = {MUON: multimodal omics analysis framework}, + journal = {Genome Biology}, + year = 2022, + month = {Feb}, + day = 01, + volume = 23, + number = 1, + pages = 42, + abstract = {Advances in multi-omics have led to an explosion of + multimodal datasets to address questions from basic biology to + translation. While these data provide novel opportunities for + discovery, they also pose management and analysis challenges, + thus motivating the development of tailored computational + solutions. Here, we present a data standard and an analysis + framework for multi-omics, MUON, designed to organise, + analyse, visualise, and exchange multimodal data. MUON stores + multimodal data in an efficient yet flexible and interoperable + data structure. MUON enables a versatile range of analyses, + from data preprocessing to flexible multi-omics alignment.}, + issn = {1474-760X}, + doi = {10.1186/s13059-021-02577-8}, + url = {https://doi.org/10.1186/s13059-021-02577-8} +} +#+end_src +** Multimodal single cell data integration challenge: Results and lessons learned +#+begin_src bibtex +@inproceedings{pmlr-v176-lance22a, + title = {Multimodal single cell data integration challenge: Results + and lessons learned}, + author = {Lance, Christopher and Luecken, Malte D. and Burkhardt, + Daniel B. and Cannoodt, Robrecht and Rautenstrauch, Pia and + Laddach, Anna and Ubingazhibov, Aidyn and Cao, Zhi-Jie and + Deng, Kaiwen and Khan, Sumeer and Liu, Qiao and Russkikh, + Nikolay and Ryazantsev, Gleb and Ohler, Uwe and data + integration competition participants, NeurIPS 2021 Multimodal + and Pisco, Angela Oliveira and Bloom, Jonathan and + Krishnaswamy, Smita and Theis, Fabian J.}, + booktitle = {Proceedings of the NeurIPS 2021 Competitions and + Demonstrations Track}, + pages = {162--176}, + year = 2022, + editor = {Kiela, Douwe and Ciccone, Marco and Caputo, Barbara}, + volume = 176, + series = {Proceedings of Machine Learning Research}, + month = {06--14 Dec}, + publisher = {PMLR}, + pdf = {https://proceedings.mlr.press/v176/lance22a/lance22a.pdf}, + url = {https://proceedings.mlr.press/v176/lance22a.html}, + abstract = {Biology has become a data-intensive science. Recent + technological advances in single-cell genomics have enabled + the measurement of multiple facets of cellular state, + producing datasets with millions of single-cell observations. + While these data hold great promise for understanding + molecular mechanisms in health and disease, analysis + challenges arising from sparsity, technical and biological + variability, and high dimensionality of the data hinder the + derivation of such mechanistic insights. To promote the + innovation of algorithms for analysis of multimodal + single-cell data, we organized a competition at NeurIPS 2021 + applying the Common Task Framework to multimodal single-cell + data integration. For this competition we generated the first + multimodal benchmarking dataset for single-cell biology and + defined three tasks in this domain: prediction of missing + modalities, aligning modalities, and learning a joint + representation across modalities. We further specified + evaluation metrics and developed a cloud-based algorithm + evaluation pipeline. Using this setup, 280 competitors + submitted over 2600 proposed solutions within a 3 month + period, showcasing substantial innovation especially in the + modality alignment task. Here, we present the results, + describe trends of well performing approaches, and discuss + challenges associated with running the competition.} +} +#+end_src diff --git a/docs/bibliography.bib b/docs/bibliography.bib index 3f25de6..0102fcd 100644 --- a/docs/bibliography.bib +++ b/docs/bibliography.bib @@ -134,6 +134,54 @@ state the goal and user for their explanations.", } +@article{https://doi.org/10.48550/arxiv.2204.01678, + doi = {10.48550/ARXIV.2204.01678}, + url = {https://arxiv.org/abs/2204.01678}, + author = {Bachmann, Roman and Mizrahi, David and Atanov, Andrei and + Zamir, Amir}, + keywords = {Computer Vision and Pattern Recognition (cs.CV), Machine + Learning (cs.LG), FOS: Computer and information sciences, FOS: + Computer and information sciences}, + title = {MultiMAE: Multi-modal Multi-task Masked Autoencoders}, + publisher = {arXiv}, + year = 2022, + copyright = {arXiv.org perpetual, non-exclusive license} +} + +@article{10.1093/bioinformatics/btab746, + author = {De Waele, Gaetan and Clauwaert, Jim and Menschaert, Gerben + and Waegeman, Willem}, + title = "{CpG Transformer for imputation of single-cell methylomes}", + journal = {Bioinformatics}, + volume = 38, + number = 3, + pages = {597-603}, + year = 2021, + month = 10, + abstract = "{The adoption of current single-cell DNA methylation + sequencing protocols is hindered by incomplete coverage, + outlining the need for effective imputation techniques. The + task of imputing single-cell (methylation) data requires + models to build an understanding of underlying biological + processes.We adapt the transformer neural network architecture + to operate on methylation matrices through combining axial + attention with sliding window self-attention. The obtained CpG + Transformer displays state-of-the-art performances on a wide + range of scBS-seq and scRRBS-seq datasets. Furthermore, we + demonstrate the interpretability of CpG Transformer and + illustrate its rapid transfer learning properties, allowing + practitioners to train models on new datasets with a limited + computational and time budget.CpG Transformer is freely + available at + https://github.com/gdewael/cpg-transformer.Supplementary data + are available at Bioinformatics online.}", + issn = {1367-4803}, + doi = {10.1093/bioinformatics/btab746}, + url = {https://doi.org/10.1093/bioinformatics/btab746}, + eprint = + {https://academic.oup.com/bioinformatics/article-pdf/38/3/597/42167564/btab746.pdf}, +} + @article {Rao2021.02.12.430858, author = {Rao, Roshan and Liu, Jason and Verkuil, Robert and Meier, Joshua and Canny, John F. and Abbeel, Pieter and Sercu, Tom @@ -223,3 +271,131 @@ doi = {10.1038/s41586-021-03819-2}, url = {https://doi.org/10.1038/s41586-021-03819-2} } + +@article {Ashuach2021.08.20.457057, + author = {Ashuach, Tal and Gabitto, Mariano I. and Jordan, Michael I. + and Yosef, Nir}, + title = {MultiVI: deep generative model for the integration of + multi-modal data}, + elocation-id = {2021.08.20.457057}, + year = 2021, + doi = {10.1101/2021.08.20.457057}, + publisher = {Cold Spring Harbor Laboratory}, + abstract = {Jointly profiling the transcriptional and chromatin + accessibility landscapes of single-cells is a powerful + technique to characterize cellular populations. Here we + present MultiVI, a probabilistic model to analyze such + multiomic data and integrate it with single modality datasets. + MultiVI creates a joint representation that accurately + reflects both chromatin and transcriptional properties of the + cells even when one modality is missing. It also imputes + missing data, corrects for batch effects and is available in + the scvi-tools framework: + https://docs.scvi-tools.org/.Competing Interest StatementThe + authors have declared no competing interest.}, + URL = + {https://www.biorxiv.org/content/early/2021/09/07/2021.08.20.457057}, + eprint = + {https://www.biorxiv.org/content/early/2021/09/07/2021.08.20.457057.full.pdf}, + journal = {bioRxiv} +} + +@article{Gong2021, + author = {Gong, Boying and Zhou, Yun and Purdom, Elizabeth}, + title = {Cobolt: integrative analysis of multimodal single-cell + sequencing data}, + journal = {Genome Biology}, + year = 2021, + month = {Dec}, + day = 28, + volume = 22, + number = 1, + pages = 351, + abstract = {A growing number of single-cell sequencing platforms enable + joint profiling of multiple omics from the same cells. We + present Cobolt, a novel method that not only allows for + analyzing the data from joint-modality platforms, but provides + a coherent framework for the integration of multiple datasets + measured on different modalities. We demonstrate its + performance on multi-modality data of gene expression and + chromatin accessibility and illustrate the integration + abilities of Cobolt by jointly analyzing this multi-modality + data with single-cell RNA-seq and ATAC-seq datasets.}, + issn = {1474-760X}, + doi = {10.1186/s13059-021-02556-z}, + url = {https://doi.org/10.1186/s13059-021-02556-z} +} + +@article{Bredikhin2022, + author = {Bredikhin, Danila and Kats, Ilia and Stegle, Oliver}, + title = {MUON: multimodal omics analysis framework}, + journal = {Genome Biology}, + year = 2022, + month = {Feb}, + day = 01, + volume = 23, + number = 1, + pages = 42, + abstract = {Advances in multi-omics have led to an explosion of + multimodal datasets to address questions from basic biology to + translation. While these data provide novel opportunities for + discovery, they also pose management and analysis challenges, + thus motivating the development of tailored computational + solutions. Here, we present a data standard and an analysis + framework for multi-omics, MUON, designed to organise, + analyse, visualise, and exchange multimodal data. MUON stores + multimodal data in an efficient yet flexible and interoperable + data structure. MUON enables a versatile range of analyses, + from data preprocessing to flexible multi-omics alignment.}, + issn = {1474-760X}, + doi = {10.1186/s13059-021-02577-8}, + url = {https://doi.org/10.1186/s13059-021-02577-8} +} + +@inproceedings{pmlr-v176-lance22a, + title = {Multimodal single cell data integration challenge: Results + and lessons learned}, + author = {Lance, Christopher and Luecken, Malte D. and Burkhardt, + Daniel B. and Cannoodt, Robrecht and Rautenstrauch, Pia and + Laddach, Anna and Ubingazhibov, Aidyn and Cao, Zhi-Jie and + Deng, Kaiwen and Khan, Sumeer and Liu, Qiao and Russkikh, + Nikolay and Ryazantsev, Gleb and Ohler, Uwe and data + integration competition participants, NeurIPS 2021 Multimodal + and Pisco, Angela Oliveira and Bloom, Jonathan and + Krishnaswamy, Smita and Theis, Fabian J.}, + booktitle = {Proceedings of the NeurIPS 2021 Competitions and + Demonstrations Track}, + pages = {162--176}, + year = 2022, + editor = {Kiela, Douwe and Ciccone, Marco and Caputo, Barbara}, + volume = 176, + series = {Proceedings of Machine Learning Research}, + month = {06--14 Dec}, + publisher = {PMLR}, + pdf = {https://proceedings.mlr.press/v176/lance22a/lance22a.pdf}, + url = {https://proceedings.mlr.press/v176/lance22a.html}, + abstract = {Biology has become a data-intensive science. Recent + technological advances in single-cell genomics have enabled + the measurement of multiple facets of cellular state, + producing datasets with millions of single-cell observations. + While these data hold great promise for understanding + molecular mechanisms in health and disease, analysis + challenges arising from sparsity, technical and biological + variability, and high dimensionality of the data hinder the + derivation of such mechanistic insights. To promote the + innovation of algorithms for analysis of multimodal + single-cell data, we organized a competition at NeurIPS 2021 + applying the Common Task Framework to multimodal single-cell + data integration. For this competition we generated the first + multimodal benchmarking dataset for single-cell biology and + defined three tasks in this domain: prediction of missing + modalities, aligning modalities, and learning a joint + representation across modalities. We further specified + evaluation metrics and developed a cloud-based algorithm + evaluation pipeline. Using this setup, 280 competitors + submitted over 2600 proposed solutions within a 3 month + period, showcasing substantial innovation especially in the + modality alignment task. Here, we present the results, + describe trends of well performing approaches, and discuss + challenges associated with running the competition.} +}