locigenesis/src/repertoire.r

42 lines
1.2 KiB
R

library(immuneSIM)
library(Biostrings)
generate_repertoire <- function(number_of_sequences) {
return(immuneSIM(
number_of_seqs = number_of_sequences,
species = "hs",
receptor = "tr",
chain = "b"
))
}
save_data <- function(data) {
Biostrings::writeXStringSet(data$sequence,
"data/sequence.fastq",
format = "fastq"
)
Biostrings::writeXStringSet(data$junction, "data/HVR.fastq", format = "fastq")
}
process_data <- function(data, reads) {
dna_sequence <- Biostrings::DNAStringSet(data$sequence)
data$sequence <- Biostrings::reverseComplement(dna_sequence)
names(data$sequence) <- paste(rownames(data), data$v_call, data$j_call, " ")
data$junction <- Biostrings::DNAStringSet(data$junction)
names(data$junction) <- rownames(data)
amplified_data <- data[rep(seq_len(nrow(data)), reads), ]
return(amplified_data)
}
parse_cli_arguments <- function() {
args <- commandArgs(trailingOnly = TRUE)
if (length(args) != 2) {
stop("usage: repertoire.r <number of sequences> <number of reads>")
}
return(args)
}
args <- parse_cli_arguments()
repertoire <- generate_repertoire(number_of_sequences = as.integer(args[1]))
data <- process_data(data = repertoire, reads = args[2])
save_data(data)