2021-02-21 20:59:39 +01:00
|
|
|
library(immuneSIM)
|
2021-02-26 02:19:40 +01:00
|
|
|
library(Biostrings)
|
2021-02-21 20:59:39 +01:00
|
|
|
|
2021-03-02 20:08:14 +01:00
|
|
|
generate_repertoire <- function(number_of_sequences) {
|
2021-03-10 12:34:20 +01:00
|
|
|
return(immuneSIM(
|
2021-02-25 20:00:35 +01:00
|
|
|
number_of_seqs = number_of_sequences,
|
|
|
|
species = "hs",
|
|
|
|
receptor = "tr",
|
2021-03-10 12:34:20 +01:00
|
|
|
chain = "b"
|
|
|
|
))
|
2021-02-25 20:00:35 +01:00
|
|
|
}
|
2021-02-21 20:59:39 +01:00
|
|
|
|
2021-03-11 21:03:16 +01:00
|
|
|
amplify_rows <- function(data, column, factor) {
|
|
|
|
if (column == "sequence") {
|
|
|
|
dna_string <- Biostrings::DNAStringSet(data)
|
|
|
|
reverse_complement <- Biostrings::reverseComplement(dna_string)
|
|
|
|
return(Biostrings::DNAStringSet(rep(reverse_complement, factor)))
|
|
|
|
}
|
|
|
|
return(as.character(rep(data, factor)))
|
|
|
|
}
|
|
|
|
|
|
|
|
save_data <- function(data, name) {
|
|
|
|
if (name == "sequence") {
|
|
|
|
file_name <- paste("data/", name, ".fasta", sep = "")
|
|
|
|
Biostrings::writeXStringSet(data, file_name, format = "fasta")
|
|
|
|
} else {
|
|
|
|
file_name <- paste("data/", name, ".txt", sep = "")
|
|
|
|
cat(data, file = file_name, sep = "\n")
|
|
|
|
}
|
2021-02-26 02:19:40 +01:00
|
|
|
}
|
|
|
|
|
2021-03-11 21:03:16 +01:00
|
|
|
process_data <- function(repertoire, sequencing_runs) {
|
|
|
|
columns <- c("sequence", "v_call", "j_call")
|
|
|
|
data <- repertoire[, columns]
|
|
|
|
amplified_data <- mapply(data, names(data), sequencing_runs, FUN = amplify_rows)
|
|
|
|
invisible(mapply(amplified_data, names(amplified_data), FUN = save_data))
|
2021-02-26 02:20:11 +01:00
|
|
|
}
|
2021-02-21 20:59:39 +01:00
|
|
|
|
2021-02-25 20:02:11 +01:00
|
|
|
parse_cli_arguments <- function(args) {
|
2021-02-28 02:23:58 +01:00
|
|
|
if (length(args) != 2) {
|
|
|
|
stop("usage: repertoire.r <number of sequences> <sequencing_runs>")
|
2021-02-25 20:02:11 +01:00
|
|
|
}
|
2021-02-28 02:23:58 +01:00
|
|
|
return(c(args[1], args[2]))
|
2021-02-25 20:02:11 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
args <- commandArgs(trailingOnly = TRUE)
|
2021-03-02 20:08:14 +01:00
|
|
|
arguments <- parse_cli_arguments(args)
|
|
|
|
number_of_sequences <- as.integer(arguments[1])
|
|
|
|
sequencing_runs <- as.integer(arguments[2])
|
|
|
|
repertoire <- generate_repertoire(number_of_sequences)
|
2021-03-11 21:03:16 +01:00
|
|
|
process_data(repertoire, sequencing_runs)
|