library(immuneSIM) library(Biostrings) generate_repertoires <- function(number_of_sequences) { a_chain <- immuneSIM( number_of_seqs = number_of_sequences, species = "hs", receptor = "tr", chain = "a", verbose = TRUE ) b_chain <- immuneSIM( number_of_seqs = number_of_sequences, species = "hs", receptor = "tr", chain = "b", verbose = TRUE ) return(list("a_chain" = a_chain, "b_chain" = b_chain)) } process_chain <- function(repertoire) { sequences <- as.character(repertoire$sequence) counts <- as.integer(repertoire$counts) reads <- Biostrings::DNAStringSet(rep(sequences, counts)) names(reads) <- seq_len(length(reads)) reverse_complement <- Biostrings::reverseComplement(reads) return(reverse_complement) } preprocess_data <- function(repertoires) { filtered_repertoires <- lapply(repertoires, process_chain) names(filtered_repertoires) <- names(repertoires) return(filtered_repertoires) } parse_cli_arguments <- function(args) { if (length(args) != 1) { stop("usage: repertoire.r ") } return(as.integer(args[1])) } args <- commandArgs(trailingOnly = TRUE) number_of_sequences <- parse_cli_arguments(args) sim_repertoire <- generate_repertoires(number_of_sequences) processed_data <- preprocess_data(sim_repertoire)