library(immuneSIM) library(Biostrings) generate_repertoire <- function(number_of_sequences) { return(immuneSIM( number_of_seqs = number_of_sequences, species = "hs", receptor = "tr", chain = "b" )) } save_data <- function(data, reads) { Biostrings::writeXStringSet(data$sequence, "data/sequence.fasta") vdj_sequences <- data[-1] amplified_vdj <- vdj_sequences[rep(seq_len(nrow(vdj_sequences)), reads), ] write.csv(amplified_vdj, "data/vdj_alignment.csv", row.names = FALSE) } process_data <- function(repertoire, reads) { columns <- c( "sequence", "v_sequence_alignment", "d_sequence_alignment", "j_sequence_alignment", "v_call", "j_call" ) data <- repertoire[, columns] dna_sequence <- Biostrings::DNAStringSet(data$sequence) data$sequence <- Biostrings::reverseComplement(dna_sequence) save_data(data, reads) } parse_cli_arguments <- function() { args <- commandArgs(trailingOnly = TRUE) if (length(args) != 2) { stop("usage: repertoire.r ") } return(c(args[1], args[2])) } args <- parse_cli_arguments() repertoire <- generate_repertoire(number_of_sequences = as.integer(args[1])) process_data(repertoire = repertoire, reads = as.integer(args[2]))