library(immuneSIM) library(Biostrings) generate_repertoires <- function(number_of_sequences) { b_chain <- immuneSIM( number_of_seqs = number_of_sequences, species = "hs", receptor = "tr", chain = "b", verbose = TRUE ) return(b_chain) } process_chain <- function(repertoire) { sequences <- as.character(repertoire$sequence) counts <- as.integer(repertoire$counts) reads <- Biostrings::DNAStringSet(rep(sequences, counts)) names(reads) <- seq_len(length(reads)) reverse_complement <- Biostrings::reverseComplement(reads) return(reverse_complement) } preprocess_data <- function(repertoires) { filtered_repertoires <- lapply(repertoires, process_chain) names(filtered_repertoires) <- names(repertoires) return(filtered_repertoires) } save_data <- function(repertoires) { for (chain in names(repertoires)) { file_name <- paste("data/", chain, ".fastq", sep = "") Biostrings::writeXStringSet(repertoires[[chain]], file_name, format = "fastq") } } parse_cli_arguments <- function(args) { if (length(args) != 1) { stop("usage: repertoire.r ") } return(as.integer(args[1])) } args <- commandArgs(trailingOnly = TRUE) number_of_sequences <- parse_cli_arguments(args) sim_repertoire <- generate_repertoires(number_of_sequences) processed_data <- preprocess_data(sim_repertoire) save_data(processed_data)