library(immuneSIM) library(Biostrings) generate_repertoire <- function(number_of_sequences) { return(immuneSIM( number_of_seqs = number_of_sequences, species = "hs", receptor = "tr", chain = "b" )) } amplify_rows <- function(data, column, factor) { if (column == "sequence") { dna_string <- Biostrings::DNAStringSet(data) reverse_complement <- Biostrings::reverseComplement(dna_string) return(rep(reverse_complement, factor)) } return(rep(data, factor)) } save_data <- function(data, name) { if (name == "sequence") { file_name <- paste("data/", name, ".fasta", sep = "") Biostrings::writeXStringSet(data, file_name, format = "fasta") } else { file_name <- paste("data/", name, ".txt", sep = "") cat(data, file = file_name, sep = "\n") } } process_data <- function(repertoire, sequencing_runs) { columns <- c("sequence", "v_call", "j_call") data <- repertoire[, columns] amplified_data <- mapply(data, names(data), sequencing_runs, FUN = amplify_rows) invisible(mapply(amplified_data, names(amplified_data), FUN = save_data)) } parse_cli_arguments <- function(args) { if (length(args) != 2) { stop("usage: repertoire.r ") } return(c(args[1], args[2])) } args <- commandArgs(trailingOnly = TRUE) arguments <- parse_cli_arguments(args) number_of_sequences <- as.integer(arguments[1]) sequencing_runs <- as.integer(arguments[2]) repertoire <- generate_repertoire(number_of_sequences) process_data(repertoire, sequencing_runs)