diff --git a/shell.nix b/shell.nix index 82b696a..94c795b 100644 --- a/shell.nix +++ b/shell.nix @@ -6,6 +6,7 @@ mkShell { buildInputs = [ R rPackages.immuneSIM + rPackages.Biostrings jdk # Develoment tools rPackages.languageserver diff --git a/src/repertoire.r b/src/repertoire.r index 1a5cefc..4ed9b55 100644 --- a/src/repertoire.r +++ b/src/repertoire.r @@ -1,4 +1,5 @@ library(immuneSIM) +library(Biostrings) generate_repertoires <- function(number_of_sequences) { a_chain <- immuneSIM( @@ -18,6 +19,21 @@ generate_repertoires <- function(number_of_sequences) { return(list("a_chain" = a_chain, "b_chain" = b_chain)) } +process_chain <- function(repertoire) { + sequences <- as.character(repertoire$sequence) + counts <- as.integer(repertoire$counts) + reads <- Biostrings::DNAStringSet(rep(sequences, counts)) + names(reads) <- seq_len(length(reads)) + reverse_complement <- Biostrings::reverseComplement(reads) + return(reverse_complement) +} + +preprocess_data <- function(repertoires) { + filtered_repertoires <- lapply(repertoires, process_chain) + names(filtered_repertoires) <- names(repertoires) + return(filtered_repertoires) +} + parse_cli_arguments <- function(args) { if (length(args) != 1) { @@ -29,3 +45,4 @@ parse_cli_arguments <- function(args) { args <- commandArgs(trailingOnly = TRUE) number_of_sequences <- parse_cli_arguments(args) sim_repertoire <- generate_repertoires(number_of_sequences) +processed_data <- preprocess_data(sim_repertoire)