locigenesis/src/repertoire.r

53 lines
1.4 KiB
R
Raw Normal View History

library(immuneSIM)
library(Biostrings)
2021-03-02 20:08:14 +01:00
generate_repertoire <- function(number_of_sequences) {
2021-03-10 12:34:20 +01:00
return(immuneSIM(
2021-02-25 20:00:35 +01:00
number_of_seqs = number_of_sequences,
species = "hs",
receptor = "tr",
2021-03-10 12:34:20 +01:00
chain = "b"
))
2021-02-25 20:00:35 +01:00
}
2021-03-11 21:03:16 +01:00
amplify_rows <- function(data, column, factor) {
if (column == "sequence") {
dna_string <- Biostrings::DNAStringSet(data)
reverse_complement <- Biostrings::reverseComplement(dna_string)
2021-03-11 21:28:00 +01:00
return(rep(reverse_complement, factor))
2021-03-11 21:03:16 +01:00
}
2021-03-11 21:28:00 +01:00
return(rep(data, factor))
2021-03-11 21:03:16 +01:00
}
2021-03-23 19:33:32 +01:00
save_data <- function(data) {
Biostrings::writeXStringSet(data$sequence, "data/sequence.fasta")
vdj_sequences <- data[-1]
write.csv(vdj_sequences, "data/vdj_alignment.csv", row.names = FALSE)
}
2021-03-11 21:03:16 +01:00
process_data <- function(repertoire, sequencing_runs) {
2021-03-23 19:33:32 +01:00
columns <- c(
"sequence", "v_sequence_alignment",
"d_sequence_alignment", "j_sequence_alignment"
)
2021-03-11 21:03:16 +01:00
data <- repertoire[, columns]
2021-03-23 19:33:32 +01:00
amplified_data <- mapply(data, names(data),
sequencing_runs,
FUN = amplify_rows
)
save_data(amplified_data)
2021-02-26 02:20:11 +01:00
}
parse_cli_arguments <- function(args) {
2021-02-28 02:23:58 +01:00
if (length(args) != 2) {
stop("usage: repertoire.r <number of sequences> <sequencing_runs>")
}
2021-02-28 02:23:58 +01:00
return(c(args[1], args[2]))
}
args <- commandArgs(trailingOnly = TRUE)
2021-03-02 20:08:14 +01:00
arguments <- parse_cli_arguments(args)
number_of_sequences <- as.integer(arguments[1])
sequencing_runs <- as.integer(arguments[2])
repertoire <- generate_repertoire(number_of_sequences)
2021-03-23 19:33:32 +01:00
process_data(repertoire, sequencing_runs)