2021-02-21 20:59:39 +01:00
|
|
|
library(immuneSIM)
|
2021-02-26 02:19:40 +01:00
|
|
|
library(Biostrings)
|
2021-02-21 20:59:39 +01:00
|
|
|
|
2021-03-02 20:08:14 +01:00
|
|
|
generate_repertoire <- function(number_of_sequences) {
|
2021-03-10 12:34:20 +01:00
|
|
|
return(immuneSIM(
|
2021-02-25 20:00:35 +01:00
|
|
|
number_of_seqs = number_of_sequences,
|
|
|
|
species = "hs",
|
|
|
|
receptor = "tr",
|
2021-03-10 12:34:20 +01:00
|
|
|
chain = "b"
|
|
|
|
))
|
2021-02-25 20:00:35 +01:00
|
|
|
}
|
2021-02-21 20:59:39 +01:00
|
|
|
|
2021-04-22 11:54:40 +02:00
|
|
|
save_data <- function(data) {
|
|
|
|
Biostrings::writeXStringSet(data$sequence, "data/sequence.fasta")
|
|
|
|
Biostrings::writeXStringSet(data$junction, "data/HVR.fasta")
|
|
|
|
}
|
|
|
|
|
2021-04-21 19:59:38 +02:00
|
|
|
process_data <- function(data) {
|
2021-03-29 20:30:16 +02:00
|
|
|
dna_sequence <- Biostrings::DNAStringSet(data$sequence)
|
|
|
|
data$sequence <- Biostrings::reverseComplement(dna_sequence)
|
2021-04-21 21:02:56 +02:00
|
|
|
names(data$sequence) <- paste(rownames(data), data$v_call, data$j_call, " ")
|
2021-04-22 11:54:40 +02:00
|
|
|
return(data)
|
2021-02-26 02:20:11 +01:00
|
|
|
}
|
2021-02-21 20:59:39 +01:00
|
|
|
|
2021-03-29 20:30:16 +02:00
|
|
|
parse_cli_arguments <- function() {
|
|
|
|
args <- commandArgs(trailingOnly = TRUE)
|
2021-04-21 19:59:38 +02:00
|
|
|
if (length(args) != 1) {
|
|
|
|
stop("usage: repertoire.r <number of sequences>")
|
2021-02-25 20:02:11 +01:00
|
|
|
}
|
2021-04-21 19:59:38 +02:00
|
|
|
return(args[1])
|
2021-02-25 20:02:11 +01:00
|
|
|
}
|
|
|
|
|
2021-04-21 19:59:38 +02:00
|
|
|
argument <- parse_cli_arguments()
|
|
|
|
repertoire <- generate_repertoire(number_of_sequences = as.integer(argument))
|
2021-04-22 11:54:40 +02:00
|
|
|
data <- process_data(data = repertoire)
|
|
|
|
save_data(data)
|