library(immuneSIM) library(Biostrings) generate_repertoire <- function(number_of_sequences) { b_chain <- immuneSIM( number_of_seqs = number_of_sequences, species = "hs", receptor = "tr", chain = "b", verbose = TRUE ) return(b_chain) } # TODO save also v_call and j_call preprocess_data <- function(repertoire, sequencing_runs) { sequences <- as.character(repertoire$sequence) reads <- Biostrings::DNAStringSet(rep(sequences, sequencing_runs)) names(reads) <- seq_len(length(reads)) reverse_complement <- Biostrings::reverseComplement(reads) return(reverse_complement) } save_data <- function(repertoire) { file_name <- "data/sequence.fasta" Biostrings::writeXStringSet(repertoire, file_name, format = "fasta") } parse_cli_arguments <- function(args) { if (length(args) != 2) { stop("usage: repertoire.r ") } return(c(args[1], args[2])) } args <- commandArgs(trailingOnly = TRUE) arguments <- parse_cli_arguments(args) number_of_sequences <- as.integer(arguments[1]) sequencing_runs <- as.integer(arguments[2]) repertoire <- generate_repertoire(number_of_sequences) processed_data <- preprocess_data(repertoire, sequencing_runs) save_data(processed_data)