library(immuneSIM) library(Biostrings) generate_repertoire <- function(number_of_sequences) { return(immuneSIM( number_of_seqs = number_of_sequences, species = "hs", receptor = "tr", chain = "b" )) } amplify_rows <- function(data, column, factor) { if (column == "sequence") { dna_string <- Biostrings::DNAStringSet(data) reverse_complement <- Biostrings::reverseComplement(dna_string) return(rep(reverse_complement, factor)) } return(rep(data, factor)) } save_data <- function(data) { Biostrings::writeXStringSet(data$sequence, "data/sequence.fasta") vdj_sequences <- data[-1] write.csv(vdj_sequences, "data/vdj_alignment.csv", row.names = FALSE) } process_data <- function(repertoire, sequencing_runs) { columns <- c( "sequence", "v_sequence_alignment", "d_sequence_alignment", "j_sequence_alignment" ) data <- repertoire[, columns] amplified_data <- mapply(data, names(data), sequencing_runs, FUN = amplify_rows ) save_data(amplified_data) } parse_cli_arguments <- function(args) { if (length(args) != 2) { stop("usage: repertoire.r ") } return(c(args[1], args[2])) } args <- commandArgs(trailingOnly = TRUE) arguments <- parse_cli_arguments(args) number_of_sequences <- as.integer(arguments[1]) sequencing_runs <- as.integer(arguments[2]) repertoire <- generate_repertoire(number_of_sequences) process_data(repertoire, sequencing_runs)