locigenesis/src/repertoire.r

38 lines
1.2 KiB
R
Raw Normal View History

library(immuneSIM)
library(Biostrings)
2021-03-02 20:08:14 +01:00
generate_repertoire <- function(number_of_sequences) {
2021-03-10 12:34:20 +01:00
return(immuneSIM(
2021-02-25 20:00:35 +01:00
number_of_seqs = number_of_sequences,
species = "hs",
receptor = "tr",
2021-03-10 12:34:20 +01:00
chain = "b"
))
2021-02-25 20:00:35 +01:00
}
save_data <- function(data, reads) {
2021-03-23 19:33:32 +01:00
Biostrings::writeXStringSet(data$sequence, "data/sequence.fasta")
vdj_sequences <- data[-1]
amplified_vdj <- vdj_sequences[rep(seq_len(nrow(vdj_sequences)), reads), ]
write.csv(amplified_vdj, "data/vdj_metadata.csv", row.names = FALSE)
}
process_data <- function(repertoire, reads) {
columns <- c("sequence", "junction", "v_call", "j_call")
2021-03-11 21:03:16 +01:00
data <- repertoire[, columns]
dna_sequence <- Biostrings::DNAStringSet(data$sequence)
data$sequence <- Biostrings::reverseComplement(dna_sequence)
save_data(data, reads)
2021-02-26 02:20:11 +01:00
}
parse_cli_arguments <- function() {
args <- commandArgs(trailingOnly = TRUE)
if (length(args) != 2) {
stop("usage: repertoire.r <number of sequences> <sequencing runs>")
}
return(c(args[1], args[2]))
}
args <- parse_cli_arguments()
repertoire <- generate_repertoire(number_of_sequences = as.integer(args[1]))
process_data(repertoire = repertoire, reads = as.integer(args[2]))