library(immuneSIM) library(Biostrings) generate_repertoires <- function(number_of_sequences) { b_chain <- immuneSIM( number_of_seqs = number_of_sequences, species = "hs", receptor = "tr", chain = "b", verbose = TRUE ) return(b_chain) } # TODO save also v_call and j_call preprocess_data <- function(repertoire, sequencing_runs) { sequences <- as.character(repertoire$sequence) reads <- Biostrings::DNAStringSet(rep(sequences, sequencing_runs)) names(reads) <- seq_len(length(reads)) reverse_complement <- Biostrings::reverseComplement(reads) return(reverse_complement) } save_data <- function(repertoire) { file_name <- "data/sequence.fastq" # TODO Change format to fasta Biostrings::writeXStringSet(repertoire, file_name, format = "fastq") } parse_cli_arguments <- function(args) { if (length(args) != 2) { stop("usage: repertoire.r ") } return(c(args[1], args[2])) } args <- commandArgs(trailingOnly = TRUE) parameters <- parse_cli_arguments(args) number_of_sequences <- as.integer(parameters[1]) sequencing_runs <- as.integer(parameters[2]) repertoire <- generate_repertoires(number_of_sequences) processed_data <- preprocess_data(repertoire, sequencing_runs) save_data(processed_data)